xref: /linux/Documentation/netlink/specs/netdev.yaml (revision d603517771d8e08a2d8fc9e1f7682ce393d3973a)
1# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
2---
3name: netdev
4
5doc: >-
6  netdev configuration over generic netlink.
7
8definitions:
9  -
10    type: flags
11    name: xdp-act
12    render-max: true
13    entries:
14      -
15        name: basic
16        doc: >-
17          XDP features set supported by all drivers
18          (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
19      -
20        name: redirect
21        doc: >-
22          The netdev supports XDP_REDIRECT
23      -
24        name: ndo-xmit
25        doc: >-
26          This feature informs if netdev implements ndo_xdp_xmit callback.
27      -
28        name: xsk-zerocopy
29        doc: >-
30          This feature informs if netdev supports AF_XDP in zero copy mode.
31      -
32        name: hw-offload
33        doc: >-
34          This feature informs if netdev supports XDP hw offloading.
35      -
36        name: rx-sg
37        doc: >-
38          This feature informs if netdev implements non-linear XDP buffer
39          support in the driver napi callback.
40      -
41        name: ndo-xmit-sg
42        doc: >-
43          This feature informs if netdev implements non-linear XDP buffer
44          support in ndo_xdp_xmit callback.
45  -
46    type: flags
47    name: xdp-rx-metadata
48    entries:
49      -
50        name: timestamp
51        doc: |
52          Device is capable of exposing receive HW timestamp via
53          bpf_xdp_metadata_rx_timestamp().
54      -
55        name: hash
56        doc: |
57          Device is capable of exposing receive packet hash via
58          bpf_xdp_metadata_rx_hash().
59      -
60        name: vlan-tag
61        doc: |
62          Device is capable of exposing receive packet VLAN tag via
63          bpf_xdp_metadata_rx_vlan_tag().
64  -
65    type: flags
66    name: xsk-flags
67    entries:
68      -
69        name: tx-timestamp
70        doc: >-
71          HW timestamping egress packets is supported by the driver.
72      -
73        name: tx-checksum
74        doc: >-
75          L3 checksum HW offload is supported by the driver.
76      -
77        name: tx-launch-time-fifo
78        doc: >-
79          Launch time HW offload is supported by the driver.
80  -
81    name: queue-type
82    type: enum
83    entries: [rx, tx]
84  -
85    name: qstats-scope
86    type: flags
87    entries: [queue]
88  -
89    name: napi-threaded
90    type: enum
91    entries: [disabled, enabled, busy-poll]
92
93attribute-sets:
94  -
95    name: dev
96    attributes:
97      -
98        name: ifindex
99        doc: netdev ifindex
100        type: u32
101        checks:
102          min: 1
103      -
104        name: pad
105        type: pad
106      -
107        name: xdp-features
108        doc: Bitmask of enabled xdp-features.
109        type: u64
110        enum: xdp-act
111      -
112        name: xdp-zc-max-segs
113        doc: max fragment count supported by ZC driver
114        type: u32
115        checks:
116          min: 1
117      -
118        name: xdp-rx-metadata-features
119        doc: Bitmask of supported XDP receive metadata features.
120             See Documentation/networking/xdp-rx-metadata.rst for more details.
121        type: u64
122        enum: xdp-rx-metadata
123      -
124        name: xsk-features
125        doc: Bitmask of enabled AF_XDP features.
126        type: u64
127        enum: xsk-flags
128  -
129    name: io-uring-provider-info
130    attributes: []
131  -
132    name: page-pool
133    attributes:
134      -
135        name: id
136        doc: Unique ID of a Page Pool instance.
137        type: uint
138        checks:
139          min: 1
140          max: u32-max
141      -
142        name: ifindex
143        doc: |
144          ifindex of the netdev to which the pool belongs.
145          May not be reported if the page pool was allocated for a netdev
146          which got destroyed already (page pools may outlast their netdevs
147          because they wait for all memory to be returned).
148        type: u32
149        checks:
150          min: 1
151          max: s32-max
152      -
153        name: napi-id
154        doc: Id of NAPI using this Page Pool instance.
155        type: uint
156        checks:
157          min: 1
158          max: u32-max
159      -
160        name: inflight
161        type: uint
162        doc: |
163          Number of outstanding references to this page pool (allocated
164          but yet to be freed pages). Allocated pages may be held in
165          socket receive queues, driver receive ring, page pool recycling
166          ring, the page pool cache, etc.
167      -
168        name: inflight-mem
169        type: uint
170        doc: |
171          Amount of memory held by inflight pages.
172      -
173        name: detach-time
174        type: uint
175        doc: |
176          Seconds in CLOCK_BOOTTIME of when Page Pool was detached by
177          the driver. Once detached Page Pool can no longer be used to
178          allocate memory.
179          Page Pools wait for all the memory allocated from them to be freed
180          before truly disappearing. "Detached" Page Pools cannot be
181          "re-attached", they are just waiting to disappear.
182          Attribute is absent if Page Pool has not been detached, and
183          can still be used to allocate new memory.
184      -
185        name: dmabuf
186        doc: ID of the dmabuf this page-pool is attached to.
187        type: u32
188      -
189        name: io-uring
190        doc: io-uring memory provider information.
191        type: nest
192        nested-attributes: io-uring-provider-info
193  -
194    name: page-pool-info
195    subset-of: page-pool
196    attributes:
197      -
198        name: id
199      -
200        name: ifindex
201  -
202    name: page-pool-stats
203    doc: |
204      Page pool statistics, see docs for struct page_pool_stats
205      for information about individual statistics.
206    attributes:
207      -
208        name: info
209        doc: Page pool identifying information.
210        type: nest
211        nested-attributes: page-pool-info
212      -
213        name: alloc-fast
214        type: uint
215        value: 8  # reserve some attr ids in case we need more metadata later
216      -
217        name: alloc-slow
218        type: uint
219      -
220        name: alloc-slow-high-order
221        type: uint
222      -
223        name: alloc-empty
224        type: uint
225      -
226        name: alloc-refill
227        type: uint
228      -
229        name: alloc-waive
230        type: uint
231      -
232        name: recycle-cached
233        type: uint
234      -
235        name: recycle-cache-full
236        type: uint
237      -
238        name: recycle-ring
239        type: uint
240      -
241        name: recycle-ring-full
242        type: uint
243      -
244        name: recycle-released-refcnt
245        type: uint
246
247  -
248    name: napi
249    attributes:
250      -
251        name: ifindex
252        doc: ifindex of the netdevice to which NAPI instance belongs.
253        type: u32
254        checks:
255          min: 1
256      -
257        name: id
258        doc: ID of the NAPI instance.
259        type: u32
260      -
261        name: irq
262        doc: The associated interrupt vector number for the napi
263        type: u32
264      -
265        name: pid
266        doc: PID of the napi thread, if NAPI is configured to operate in
267             threaded mode. If NAPI is not in threaded mode (i.e. uses normal
268             softirq context), the attribute will be absent.
269        type: u32
270      -
271        name: defer-hard-irqs
272        doc: The number of consecutive empty polls before IRQ deferral ends
273             and hardware IRQs are re-enabled.
274        type: u32
275        checks:
276          max: s32-max
277      -
278        name: gro-flush-timeout
279        doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog
280             timer which schedules NAPI processing. Additionally, a non-zero
281             value will also prevent GRO from flushing recent super-frames at
282             the end of a NAPI cycle. This may add receive latency in exchange
283             for reducing the number of frames processed by the network stack.
284        type: uint
285      -
286        name: irq-suspend-timeout
287        doc: The timeout, in nanoseconds, of how long to suspend irq
288             processing, if event polling finds events
289        type: uint
290      -
291        name: threaded
292        doc: Whether the NAPI is configured to operate in threaded polling
293             mode. If this is set to enabled then the NAPI context operates
294             in threaded polling mode. If this is set to busy-poll, then the
295             threaded polling mode also busy polls.
296        type: u32
297        enum: napi-threaded
298  -
299    name: xsk-info
300    attributes: []
301  -
302    name: queue
303    attributes:
304      -
305        name: id
306        doc: Queue index; most queue types are indexed like a C array, with
307             indexes starting at 0 and ending at queue count - 1. Queue indexes
308             are scoped to an interface and queue type.
309        type: u32
310      -
311        name: ifindex
312        doc: ifindex of the netdevice to which the queue belongs.
313        type: u32
314        checks:
315          min: 1
316      -
317        name: type
318        doc: Queue type as rx, tx. Each queue type defines a separate ID space.
319             XDP TX queues allocated in the kernel are not linked to NAPIs and
320             thus not listed. AF_XDP queues will have more information set in
321             the xsk attribute.
322        type: u32
323        enum: queue-type
324      -
325        name: napi-id
326        doc: ID of the NAPI instance which services this queue.
327        type: u32
328      -
329        name: dmabuf
330        doc: ID of the dmabuf attached to this queue, if any.
331        type: u32
332      -
333        name: io-uring
334        doc: io_uring memory provider information.
335        type: nest
336        nested-attributes: io-uring-provider-info
337      -
338        name: xsk
339        doc: XSK information for this queue, if any.
340        type: nest
341        nested-attributes: xsk-info
342      -
343        name: lease
344        doc: |
345          A queue from a virtual device can have a lease which refers to
346          another queue from a physical device. This is useful for memory
347          providers and AF_XDP operations which take an ifindex and queue id
348          to allow applications to bind against virtual devices in containers.
349        type: nest
350        nested-attributes: lease
351  -
352    name: qstats
353    doc: |
354      Get device statistics, scoped to a device or a queue.
355      These statistics extend (and partially duplicate) statistics available
356      in struct rtnl_link_stats64.
357      Value of the `scope` attribute determines how statistics are
358      aggregated. When aggregated for the entire device the statistics
359      represent the total number of events since last explicit reset of
360      the device (i.e. not a reconfiguration like changing queue count).
361      When reported per-queue, however, the statistics may not add
362      up to the total number of events, will only be reported for currently
363      active objects, and will likely report the number of events since last
364      reconfiguration.
365    attributes:
366      -
367        name: ifindex
368        doc: ifindex of the netdevice to which stats belong.
369        type: u32
370        checks:
371          min: 1
372      -
373        name: queue-type
374        doc: Queue type as rx, tx, for queue-id.
375        type: u32
376        enum: queue-type
377      -
378        name: queue-id
379        doc: Queue ID, if stats are scoped to a single queue instance.
380        type: u32
381      -
382        name: scope
383        doc: |
384          What object type should be used to iterate over the stats.
385        type: uint
386        enum: qstats-scope
387      -
388        name: rx-packets
389        doc: |
390          Number of wire packets successfully received and passed to the stack.
391          For drivers supporting XDP, XDP is considered the first layer
392          of the stack, so packets consumed by XDP are still counted here.
393        type: uint
394        value: 8  # reserve some attr ids in case we need more metadata later
395      -
396        name: rx-bytes
397        doc: Successfully received bytes, see `rx-packets`.
398        type: uint
399      -
400        name: tx-packets
401        doc: |
402          Number of wire packets successfully sent. Packet is considered to be
403          successfully sent once it is in device memory (usually this means
404          the device has issued a DMA completion for the packet).
405        type: uint
406      -
407        name: tx-bytes
408        doc: Successfully sent bytes, see `tx-packets`.
409        type: uint
410      -
411        name: rx-alloc-fail
412        doc: |
413          Number of times skb or buffer allocation failed on the Rx datapath.
414          Allocation failure may, or may not result in a packet drop, depending
415          on driver implementation and whether system recovers quickly.
416        type: uint
417      -
418        name: rx-hw-drops
419        doc: |
420          Number of all packets which entered the device, but never left it,
421          including but not limited to: packets dropped due to lack of buffer
422          space, processing errors, explicit or implicit policies and packet
423          filters.
424        type: uint
425      -
426        name: rx-hw-drop-overruns
427        doc: |
428          Number of packets dropped due to transient lack of resources, such as
429          buffer space, host descriptors etc.
430        type: uint
431      -
432        name: rx-csum-complete
433        doc: Number of packets that were marked as CHECKSUM_COMPLETE.
434        type: uint
435      -
436        name: rx-csum-unnecessary
437        doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
438        type: uint
439      -
440        name: rx-csum-none
441        doc: Number of packets that were not checksummed by device.
442        type: uint
443      -
444        name: rx-csum-bad
445        doc: |
446          Number of packets with bad checksum. The packets are not discarded,
447          but still delivered to the stack.
448        type: uint
449      -
450        name: rx-hw-gro-packets
451        doc: |
452          Number of packets that were coalesced from smaller packets by the
453          device. Counts only packets coalesced with the HW-GRO netdevice
454          feature, LRO-coalesced packets are not counted.
455        type: uint
456      -
457        name: rx-hw-gro-bytes
458        doc: See `rx-hw-gro-packets`.
459        type: uint
460      -
461        name: rx-hw-gro-wire-packets
462        doc: |
463          Number of packets that were coalesced to bigger packetss with the
464          HW-GRO netdevice feature. LRO-coalesced packets are not counted.
465        type: uint
466      -
467        name: rx-hw-gro-wire-bytes
468        doc: See `rx-hw-gro-wire-packets`.
469        type: uint
470      -
471        name: rx-hw-drop-ratelimits
472        doc: |
473          Number of the packets dropped by the device due to the received
474          packets bitrate exceeding the device rate limit.
475        type: uint
476      -
477        name: tx-hw-drops
478        doc: |
479          Number of packets that arrived at the device but never left it,
480          encompassing packets dropped for reasons such as processing errors, as
481          well as those affected by explicitly defined policies and packet
482          filtering criteria.
483        type: uint
484      -
485        name: tx-hw-drop-errors
486        doc: Number of packets dropped because they were invalid or malformed.
487        type: uint
488      -
489        name: tx-csum-none
490        doc: |
491          Number of packets that did not require the device to calculate the
492          checksum.
493        type: uint
494      -
495        name: tx-needs-csum
496        doc: |
497          Number of packets that required the device to calculate the checksum.
498          This counter includes the number of GSO wire packets for which device
499          calculated the L4 checksum.
500        type: uint
501      -
502        name: tx-hw-gso-packets
503        doc: |
504          Number of packets that necessitated segmentation into smaller packets
505          by the device.
506        type: uint
507      -
508        name: tx-hw-gso-bytes
509        doc: See `tx-hw-gso-packets`.
510        type: uint
511      -
512        name: tx-hw-gso-wire-packets
513        doc: |
514          Number of wire-sized packets generated by processing
515          `tx-hw-gso-packets`
516        type: uint
517      -
518        name: tx-hw-gso-wire-bytes
519        doc: See `tx-hw-gso-wire-packets`.
520        type: uint
521      -
522        name: tx-hw-drop-ratelimits
523        doc: |
524          Number of the packets dropped by the device due to the transmit
525          packets bitrate exceeding the device rate limit.
526        type: uint
527      -
528        name: tx-stop
529        doc: |
530          Number of times driver paused accepting new tx packets
531          from the stack to this queue, because the queue was full.
532          Note that if BQL is supported and enabled on the device
533          the networking stack will avoid queuing a lot of data at once.
534        type: uint
535      -
536        name: tx-wake
537        doc: |
538          Number of times driver re-started accepting send
539          requests to this queue from the stack.
540        type: uint
541  -
542    name: queue-id
543    subset-of: queue
544    attributes:
545      -
546        name: id
547      -
548        name: type
549  -
550    name: lease
551    attributes:
552      -
553        name: ifindex
554        doc: The netdev ifindex to lease the queue from.
555        type: u32
556        checks:
557          min: 1
558      -
559        name: queue
560        doc: The netdev queue to lease from.
561        type: nest
562        nested-attributes: queue-id
563      -
564        name: netns-id
565        doc: The network namespace id of the netdev.
566        type: s32
567        checks:
568          min: 0
569  -
570    name: dmabuf
571    attributes:
572      -
573        name: ifindex
574        doc: netdev ifindex to bind the dmabuf to.
575        type: u32
576        checks:
577          min: 1
578      -
579        name: queues
580        doc: receive queues to bind the dmabuf to.
581        type: nest
582        nested-attributes: queue-id
583        multi-attr: true
584      -
585        name: fd
586        doc: dmabuf file descriptor to bind.
587        type: u32
588      -
589        name: id
590        doc: id of the dmabuf binding
591        type: u32
592        checks:
593          min: 1
594
595operations:
596  list:
597    -
598      name: dev-get
599      doc: Get / dump information about a netdev.
600      attribute-set: dev
601      do:
602        request:
603          attributes:
604            - ifindex
605        reply: &dev-all
606          attributes:
607            - ifindex
608            - xdp-features
609            - xdp-zc-max-segs
610            - xdp-rx-metadata-features
611            - xsk-features
612      dump:
613        reply: *dev-all
614    -
615      name: dev-add-ntf
616      doc: Notification about device appearing.
617      notify: dev-get
618      mcgrp: mgmt
619    -
620      name: dev-del-ntf
621      doc: Notification about device disappearing.
622      notify: dev-get
623      mcgrp: mgmt
624    -
625      name: dev-change-ntf
626      doc: Notification about device configuration being changed.
627      notify: dev-get
628      mcgrp: mgmt
629    -
630      name: page-pool-get
631      doc: |
632        Get / dump information about Page Pools.
633        Only Page Pools associated by the driver with a net_device
634        can be listed. ifindex will not be reported if the net_device
635        no longer exists.
636      attribute-set: page-pool
637      do:
638        request:
639          attributes:
640            - id
641        reply: &pp-reply
642          attributes:
643            - id
644            - ifindex
645            - napi-id
646            - inflight
647            - inflight-mem
648            - detach-time
649            - dmabuf
650            - io-uring
651      dump:
652        request:
653          attributes:
654            - ifindex
655        reply: *pp-reply
656      config-cond: page-pool
657    -
658      name: page-pool-add-ntf
659      doc: Notification about page pool appearing.
660      notify: page-pool-get
661      mcgrp: page-pool
662      config-cond: page-pool
663    -
664      name: page-pool-del-ntf
665      doc: Notification about page pool disappearing.
666      notify: page-pool-get
667      mcgrp: page-pool
668      config-cond: page-pool
669    -
670      name: page-pool-change-ntf
671      doc: Notification about page pool configuration being changed.
672      notify: page-pool-get
673      mcgrp: page-pool
674      config-cond: page-pool
675    -
676      name: page-pool-stats-get
677      doc: Get page pool statistics.
678      attribute-set: page-pool-stats
679      do:
680        request:
681          attributes:
682            - info
683        reply: &pp-stats-reply
684          attributes:
685            - info
686            - alloc-fast
687            - alloc-slow
688            - alloc-slow-high-order
689            - alloc-empty
690            - alloc-refill
691            - alloc-waive
692            - recycle-cached
693            - recycle-cache-full
694            - recycle-ring
695            - recycle-ring-full
696            - recycle-released-refcnt
697      dump:
698        request:
699          attributes:
700            - info
701        reply: *pp-stats-reply
702      config-cond: page-pool-stats
703    -
704      name: queue-get
705      doc: Get queue information from the kernel.
706           Only configured queues will be reported (as opposed to all available
707           hardware queues).
708      attribute-set: queue
709      do:
710        request:
711          attributes:
712            - ifindex
713            - type
714            - id
715        reply: &queue-get-op
716          attributes:
717            - id
718            - type
719            - napi-id
720            - ifindex
721            - dmabuf
722            - io-uring
723            - xsk
724            - lease
725      dump:
726        request:
727          attributes:
728            - ifindex
729        reply: *queue-get-op
730    -
731      name: napi-get
732      doc: Get information about NAPI instances configured on the system.
733      attribute-set: napi
734      do:
735        request:
736          attributes:
737            - id
738        reply: &napi-get-op
739          attributes:
740            - id
741            - ifindex
742            - irq
743            - pid
744            - defer-hard-irqs
745            - gro-flush-timeout
746            - irq-suspend-timeout
747            - threaded
748      dump:
749        request:
750          attributes:
751            - ifindex
752        reply: *napi-get-op
753    -
754      name: qstats-get
755      doc: |
756        Get / dump fine grained statistics. Which statistics are reported
757        depends on the device and the driver, and whether the driver stores
758        software counters per-queue.
759      attribute-set: qstats
760      dump:
761        request:
762          attributes:
763            - ifindex
764            - scope
765        reply:
766          attributes:
767            - ifindex
768            - queue-type
769            - queue-id
770            - rx-packets
771            - rx-bytes
772            - tx-packets
773            - tx-bytes
774            - rx-alloc-fail
775            - rx-hw-drops
776            - rx-hw-drop-overruns
777            - rx-csum-complete
778            - rx-csum-unnecessary
779            - rx-csum-none
780            - rx-csum-bad
781            - rx-hw-gro-packets
782            - rx-hw-gro-bytes
783            - rx-hw-gro-wire-packets
784            - rx-hw-gro-wire-bytes
785            - rx-hw-drop-ratelimits
786            - tx-hw-drops
787            - tx-hw-drop-errors
788            - tx-csum-none
789            - tx-needs-csum
790            - tx-hw-gso-packets
791            - tx-hw-gso-bytes
792            - tx-hw-gso-wire-packets
793            - tx-hw-gso-wire-bytes
794            - tx-hw-drop-ratelimits
795            - tx-stop
796            - tx-wake
797    -
798      name: bind-rx
799      doc: Bind dmabuf to netdev
800      attribute-set: dmabuf
801      flags: [admin-perm]
802      do:
803        request:
804          attributes:
805            - ifindex
806            - fd
807            - queues
808        reply:
809          attributes:
810            - id
811    -
812      name: napi-set
813      doc: Set configurable NAPI instance settings.
814      attribute-set: napi
815      flags: [admin-perm]
816      do:
817        request:
818          attributes:
819            - id
820            - defer-hard-irqs
821            - gro-flush-timeout
822            - irq-suspend-timeout
823            - threaded
824    -
825      name: bind-tx
826      doc: Bind dmabuf to netdev for TX
827      attribute-set: dmabuf
828      do:
829        request:
830          attributes:
831            - ifindex
832            - fd
833        reply:
834          attributes:
835            - id
836    -
837      name: queue-create
838      doc: |
839        Create a new queue for the given netdevice. Whether this operation
840        is supported depends on the device and the driver.
841      attribute-set: queue
842      flags: [admin-perm]
843      do:
844        request:
845          attributes:
846            - ifindex
847            - type
848            - lease
849        reply: &queue-create-op
850          attributes:
851            - id
852
853kernel-family:
854  headers: ["net/netdev_netlink.h"]
855  sock-priv: struct netdev_nl_sock
856
857mcast-groups:
858  list:
859    -
860      name: mgmt
861    -
862      name: page-pool
863