xref: /linux/Documentation/netlink/specs/netdev.yaml (revision d639d9fa162aadec1ae9980c4dcf6e50bd2f8290)
1# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
2---
3name: netdev
4
5doc: >-
6  netdev configuration over generic netlink.
7
8definitions:
9  -
10    type: flags
11    name: xdp-act
12    render-max: true
13    entries:
14      -
15        name: basic
16        doc: >-
17          XDP features set supported by all drivers
18          (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
19      -
20        name: redirect
21        doc: >-
22          The netdev supports XDP_REDIRECT
23      -
24        name: ndo-xmit
25        doc: >-
26          This feature informs if netdev implements ndo_xdp_xmit callback.
27      -
28        name: xsk-zerocopy
29        doc: >-
30          This feature informs if netdev supports AF_XDP in zero copy mode.
31      -
32        name: hw-offload
33        doc: >-
34          This feature informs if netdev supports XDP hw offloading.
35      -
36        name: rx-sg
37        doc: >-
38          This feature informs if netdev implements non-linear XDP buffer
39          support in the driver napi callback.
40      -
41        name: ndo-xmit-sg
42        doc: >-
43          This feature informs if netdev implements non-linear XDP buffer
44          support in ndo_xdp_xmit callback.
45  -
46    type: flags
47    name: xdp-rx-metadata
48    entries:
49      -
50        name: timestamp
51        doc: |
52          Device is capable of exposing receive HW timestamp via
53          bpf_xdp_metadata_rx_timestamp().
54      -
55        name: hash
56        doc: |
57          Device is capable of exposing receive packet hash via
58          bpf_xdp_metadata_rx_hash().
59      -
60        name: vlan-tag
61        doc: |
62          Device is capable of exposing receive packet VLAN tag via
63          bpf_xdp_metadata_rx_vlan_tag().
64  -
65    type: flags
66    name: xsk-flags
67    entries:
68      -
69        name: tx-timestamp
70        doc: >-
71          HW timestamping egress packets is supported by the driver.
72      -
73        name: tx-checksum
74        doc: >-
75          L3 checksum HW offload is supported by the driver.
76      -
77        name: tx-launch-time-fifo
78        doc: >-
79          Launch time HW offload is supported by the driver.
80  -
81    name: queue-type
82    type: enum
83    entries: [rx, tx]
84  -
85    name: qstats-scope
86    type: flags
87    entries: [queue]
88  -
89    name: napi-threaded
90    type: enum
91    entries: [disabled, enabled, busy-poll]
92
93attribute-sets:
94  -
95    name: dev
96    attributes:
97      -
98        name: ifindex
99        doc: netdev ifindex
100        type: u32
101        checks:
102          min: 1
103      -
104        name: pad
105        type: pad
106      -
107        name: xdp-features
108        doc: Bitmask of enabled xdp-features.
109        type: u64
110        enum: xdp-act
111      -
112        name: xdp-zc-max-segs
113        doc: max fragment count supported by ZC driver
114        type: u32
115        checks:
116          min: 1
117      -
118        name: xdp-rx-metadata-features
119        doc: Bitmask of supported XDP receive metadata features.
120             See Documentation/networking/xdp-rx-metadata.rst for more details.
121        type: u64
122        enum: xdp-rx-metadata
123      -
124        name: xsk-features
125        doc: Bitmask of enabled AF_XDP features.
126        type: u64
127        enum: xsk-flags
128  -
129    name: io-uring-provider-info
130    attributes:
131      -
132        name: rx-buf-len
133        type: uint
134        doc: |
135          RX buffer length in bytes for this io_uring memory provider.
136          Reflects the rx_buf_len passed at io_uring zerocopy rx
137          registration time.
138  -
139    name: page-pool
140    attributes:
141      -
142        name: id
143        doc: Unique ID of a Page Pool instance.
144        type: uint
145        checks:
146          min: 1
147          max: u32-max
148      -
149        name: ifindex
150        doc: |
151          ifindex of the netdev to which the pool belongs.
152          May not be reported if the page pool was allocated for a netdev
153          which got destroyed already (page pools may outlast their netdevs
154          because they wait for all memory to be returned).
155        type: u32
156        checks:
157          min: 1
158          max: s32-max
159      -
160        name: napi-id
161        doc: Id of NAPI using this Page Pool instance.
162        type: uint
163        checks:
164          min: 1
165          max: u32-max
166      -
167        name: inflight
168        type: uint
169        doc: |
170          Number of outstanding references to this page pool (allocated
171          but yet to be freed pages). Allocated pages may be held in
172          socket receive queues, driver receive ring, page pool recycling
173          ring, the page pool cache, etc.
174      -
175        name: inflight-mem
176        type: uint
177        doc: |
178          Amount of memory held by inflight pages.
179      -
180        name: detach-time
181        type: uint
182        doc: |
183          Seconds in CLOCK_BOOTTIME of when Page Pool was detached by
184          the driver. Once detached Page Pool can no longer be used to
185          allocate memory.
186          Page Pools wait for all the memory allocated from them to be freed
187          before truly disappearing. "Detached" Page Pools cannot be
188          "re-attached", they are just waiting to disappear.
189          Attribute is absent if Page Pool has not been detached, and
190          can still be used to allocate new memory.
191      -
192        name: dmabuf
193        doc: ID of the dmabuf this page-pool is attached to.
194        type: u32
195      -
196        name: io-uring
197        doc: io-uring memory provider information.
198        type: nest
199        nested-attributes: io-uring-provider-info
200  -
201    name: page-pool-info
202    subset-of: page-pool
203    attributes:
204      -
205        name: id
206      -
207        name: ifindex
208  -
209    name: page-pool-stats
210    doc: |
211      Page pool statistics, see docs for struct page_pool_stats
212      for information about individual statistics.
213    attributes:
214      -
215        name: info
216        doc: Page pool identifying information.
217        type: nest
218        nested-attributes: page-pool-info
219      -
220        name: alloc-fast
221        type: uint
222        value: 8  # reserve some attr ids in case we need more metadata later
223      -
224        name: alloc-slow
225        type: uint
226      -
227        name: alloc-slow-high-order
228        type: uint
229      -
230        name: alloc-empty
231        type: uint
232      -
233        name: alloc-refill
234        type: uint
235      -
236        name: alloc-waive
237        type: uint
238      -
239        name: recycle-cached
240        type: uint
241      -
242        name: recycle-cache-full
243        type: uint
244      -
245        name: recycle-ring
246        type: uint
247      -
248        name: recycle-ring-full
249        type: uint
250      -
251        name: recycle-released-refcnt
252        type: uint
253
254  -
255    name: napi
256    attributes:
257      -
258        name: ifindex
259        doc: ifindex of the netdevice to which NAPI instance belongs.
260        type: u32
261        checks:
262          min: 1
263      -
264        name: id
265        doc: ID of the NAPI instance.
266        type: u32
267      -
268        name: irq
269        doc: The associated interrupt vector number for the napi
270        type: u32
271      -
272        name: pid
273        doc: PID of the napi thread, if NAPI is configured to operate in
274             threaded mode. If NAPI is not in threaded mode (i.e. uses normal
275             softirq context), the attribute will be absent.
276        type: u32
277      -
278        name: defer-hard-irqs
279        doc: The number of consecutive empty polls before IRQ deferral ends
280             and hardware IRQs are re-enabled.
281        type: u32
282        checks:
283          max: s32-max
284      -
285        name: gro-flush-timeout
286        doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog
287             timer which schedules NAPI processing. Additionally, a non-zero
288             value will also prevent GRO from flushing recent super-frames at
289             the end of a NAPI cycle. This may add receive latency in exchange
290             for reducing the number of frames processed by the network stack.
291        type: uint
292      -
293        name: irq-suspend-timeout
294        doc: The timeout, in nanoseconds, of how long to suspend irq
295             processing, if event polling finds events
296        type: uint
297      -
298        name: threaded
299        doc: Whether the NAPI is configured to operate in threaded polling
300             mode. If this is set to enabled then the NAPI context operates
301             in threaded polling mode. If this is set to busy-poll, then the
302             threaded polling mode also busy polls.
303        type: u32
304        enum: napi-threaded
305  -
306    name: xsk-info
307    attributes: []
308  -
309    name: queue
310    attributes:
311      -
312        name: id
313        doc: Queue index; most queue types are indexed like a C array, with
314             indexes starting at 0 and ending at queue count - 1. Queue indexes
315             are scoped to an interface and queue type.
316        type: u32
317      -
318        name: ifindex
319        doc: ifindex of the netdevice to which the queue belongs.
320        type: u32
321        checks:
322          min: 1
323      -
324        name: type
325        doc: Queue type as rx, tx. Each queue type defines a separate ID space.
326             XDP TX queues allocated in the kernel are not linked to NAPIs and
327             thus not listed. AF_XDP queues will have more information set in
328             the xsk attribute.
329        type: u32
330        enum: queue-type
331      -
332        name: napi-id
333        doc: ID of the NAPI instance which services this queue.
334        type: u32
335      -
336        name: dmabuf
337        doc: ID of the dmabuf attached to this queue, if any.
338        type: u32
339      -
340        name: io-uring
341        doc: io_uring memory provider information.
342        type: nest
343        nested-attributes: io-uring-provider-info
344      -
345        name: xsk
346        doc: XSK information for this queue, if any.
347        type: nest
348        nested-attributes: xsk-info
349      -
350        name: lease
351        doc: |
352          A queue from a virtual device can have a lease which refers to
353          another queue from a physical device. This is useful for memory
354          providers and AF_XDP operations which take an ifindex and queue id
355          to allow applications to bind against virtual devices in containers.
356        type: nest
357        nested-attributes: lease
358  -
359    name: qstats
360    doc: |
361      Get device statistics, scoped to a device or a queue.
362      These statistics extend (and partially duplicate) statistics available
363      in struct rtnl_link_stats64.
364      Value of the `scope` attribute determines how statistics are
365      aggregated. When aggregated for the entire device the statistics
366      represent the total number of events since last explicit reset of
367      the device (i.e. not a reconfiguration like changing queue count).
368      When reported per-queue, however, the statistics may not add
369      up to the total number of events, will only be reported for currently
370      active objects, and will likely report the number of events since last
371      reconfiguration.
372    attributes:
373      -
374        name: ifindex
375        doc: ifindex of the netdevice to which stats belong.
376        type: u32
377        checks:
378          min: 1
379      -
380        name: queue-type
381        doc: Queue type as rx, tx, for queue-id.
382        type: u32
383        enum: queue-type
384      -
385        name: queue-id
386        doc: Queue ID, if stats are scoped to a single queue instance.
387        type: u32
388      -
389        name: scope
390        doc: |
391          What object type should be used to iterate over the stats.
392        type: uint
393        enum: qstats-scope
394      -
395        name: rx-packets
396        doc: |
397          Number of wire packets successfully received and passed to the stack.
398          For drivers supporting XDP, XDP is considered the first layer
399          of the stack, so packets consumed by XDP are still counted here.
400        type: uint
401        value: 8  # reserve some attr ids in case we need more metadata later
402      -
403        name: rx-bytes
404        doc: Successfully received bytes, see `rx-packets`.
405        type: uint
406      -
407        name: tx-packets
408        doc: |
409          Number of wire packets successfully sent. Packet is considered to be
410          successfully sent once it is in device memory (usually this means
411          the device has issued a DMA completion for the packet).
412        type: uint
413      -
414        name: tx-bytes
415        doc: Successfully sent bytes, see `tx-packets`.
416        type: uint
417      -
418        name: rx-alloc-fail
419        doc: |
420          Number of times skb or buffer allocation failed on the Rx datapath.
421          Allocation failure may, or may not result in a packet drop, depending
422          on driver implementation and whether system recovers quickly.
423        type: uint
424      -
425        name: rx-hw-drops
426        doc: |
427          Number of all packets which entered the device, but never left it,
428          including but not limited to: packets dropped due to lack of buffer
429          space, processing errors, explicit or implicit policies and packet
430          filters.
431        type: uint
432      -
433        name: rx-hw-drop-overruns
434        doc: |
435          Number of packets dropped due to transient lack of resources, such as
436          buffer space, host descriptors etc.
437        type: uint
438      -
439        name: rx-csum-complete
440        doc: Number of packets that were marked as CHECKSUM_COMPLETE.
441        type: uint
442      -
443        name: rx-csum-unnecessary
444        doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
445        type: uint
446      -
447        name: rx-csum-none
448        doc: Number of packets that were not checksummed by device.
449        type: uint
450      -
451        name: rx-csum-bad
452        doc: |
453          Number of packets with bad checksum. The packets are not discarded,
454          but still delivered to the stack.
455        type: uint
456      -
457        name: rx-hw-gro-packets
458        doc: |
459          Number of packets that were coalesced from smaller packets by the
460          device. Counts only packets coalesced with the HW-GRO netdevice
461          feature, LRO-coalesced packets are not counted.
462        type: uint
463      -
464        name: rx-hw-gro-bytes
465        doc: See `rx-hw-gro-packets`.
466        type: uint
467      -
468        name: rx-hw-gro-wire-packets
469        doc: |
470          Number of packets that were coalesced to bigger packetss with the
471          HW-GRO netdevice feature. LRO-coalesced packets are not counted.
472        type: uint
473      -
474        name: rx-hw-gro-wire-bytes
475        doc: See `rx-hw-gro-wire-packets`.
476        type: uint
477      -
478        name: rx-hw-drop-ratelimits
479        doc: |
480          Number of the packets dropped by the device due to the received
481          packets bitrate exceeding the device rate limit.
482        type: uint
483      -
484        name: tx-hw-drops
485        doc: |
486          Number of packets that arrived at the device but never left it,
487          encompassing packets dropped for reasons such as processing errors, as
488          well as those affected by explicitly defined policies and packet
489          filtering criteria.
490        type: uint
491      -
492        name: tx-hw-drop-errors
493        doc: Number of packets dropped because they were invalid or malformed.
494        type: uint
495      -
496        name: tx-csum-none
497        doc: |
498          Number of packets that did not require the device to calculate the
499          checksum.
500        type: uint
501      -
502        name: tx-needs-csum
503        doc: |
504          Number of packets that required the device to calculate the checksum.
505          This counter includes the number of GSO wire packets for which device
506          calculated the L4 checksum.
507        type: uint
508      -
509        name: tx-hw-gso-packets
510        doc: |
511          Number of packets that necessitated segmentation into smaller packets
512          by the device.
513        type: uint
514      -
515        name: tx-hw-gso-bytes
516        doc: See `tx-hw-gso-packets`.
517        type: uint
518      -
519        name: tx-hw-gso-wire-packets
520        doc: |
521          Number of wire-sized packets generated by processing
522          `tx-hw-gso-packets`
523        type: uint
524      -
525        name: tx-hw-gso-wire-bytes
526        doc: See `tx-hw-gso-wire-packets`.
527        type: uint
528      -
529        name: tx-hw-drop-ratelimits
530        doc: |
531          Number of the packets dropped by the device due to the transmit
532          packets bitrate exceeding the device rate limit.
533        type: uint
534      -
535        name: tx-stop
536        doc: |
537          Number of times driver paused accepting new tx packets
538          from the stack to this queue, because the queue was full.
539          Note that if BQL is supported and enabled on the device
540          the networking stack will avoid queuing a lot of data at once.
541        type: uint
542      -
543        name: tx-wake
544        doc: |
545          Number of times driver re-started accepting send
546          requests to this queue from the stack.
547        type: uint
548  -
549    name: queue-id
550    subset-of: queue
551    attributes:
552      -
553        name: id
554      -
555        name: type
556  -
557    name: lease
558    attributes:
559      -
560        name: ifindex
561        doc: The netdev ifindex to lease the queue from.
562        type: u32
563        checks:
564          min: 1
565      -
566        name: queue
567        doc: The netdev queue to lease from.
568        type: nest
569        nested-attributes: queue-id
570      -
571        name: netns-id
572        doc: The network namespace id of the netdev.
573        type: s32
574        checks:
575          min: 0
576  -
577    name: dmabuf
578    attributes:
579      -
580        name: ifindex
581        doc: netdev ifindex to bind the dmabuf to.
582        type: u32
583        checks:
584          min: 1
585      -
586        name: queues
587        doc: receive queues to bind the dmabuf to.
588        type: nest
589        nested-attributes: queue-id
590        multi-attr: true
591      -
592        name: fd
593        doc: dmabuf file descriptor to bind.
594        type: u32
595      -
596        name: id
597        doc: id of the dmabuf binding
598        type: u32
599        checks:
600          min: 1
601
602operations:
603  list:
604    -
605      name: dev-get
606      doc: Get / dump information about a netdev.
607      attribute-set: dev
608      do:
609        request:
610          attributes:
611            - ifindex
612        reply: &dev-all
613          attributes:
614            - ifindex
615            - xdp-features
616            - xdp-zc-max-segs
617            - xdp-rx-metadata-features
618            - xsk-features
619      dump:
620        reply: *dev-all
621    -
622      name: dev-add-ntf
623      doc: Notification about device appearing.
624      notify: dev-get
625      mcgrp: mgmt
626    -
627      name: dev-del-ntf
628      doc: Notification about device disappearing.
629      notify: dev-get
630      mcgrp: mgmt
631    -
632      name: dev-change-ntf
633      doc: Notification about device configuration being changed.
634      notify: dev-get
635      mcgrp: mgmt
636    -
637      name: page-pool-get
638      doc: |
639        Get / dump information about Page Pools.
640        Only Page Pools associated by the driver with a net_device
641        can be listed. ifindex will not be reported if the net_device
642        no longer exists.
643      attribute-set: page-pool
644      do:
645        request:
646          attributes:
647            - id
648        reply: &pp-reply
649          attributes:
650            - id
651            - ifindex
652            - napi-id
653            - inflight
654            - inflight-mem
655            - detach-time
656            - dmabuf
657            - io-uring
658      dump:
659        request:
660          attributes:
661            - ifindex
662        reply: *pp-reply
663      config-cond: page-pool
664    -
665      name: page-pool-add-ntf
666      doc: Notification about page pool appearing.
667      notify: page-pool-get
668      mcgrp: page-pool
669      config-cond: page-pool
670    -
671      name: page-pool-del-ntf
672      doc: Notification about page pool disappearing.
673      notify: page-pool-get
674      mcgrp: page-pool
675      config-cond: page-pool
676    -
677      name: page-pool-change-ntf
678      doc: Notification about page pool configuration being changed.
679      notify: page-pool-get
680      mcgrp: page-pool
681      config-cond: page-pool
682    -
683      name: page-pool-stats-get
684      doc: Get page pool statistics.
685      attribute-set: page-pool-stats
686      do:
687        request:
688          attributes:
689            - info
690        reply: &pp-stats-reply
691          attributes:
692            - info
693            - alloc-fast
694            - alloc-slow
695            - alloc-slow-high-order
696            - alloc-empty
697            - alloc-refill
698            - alloc-waive
699            - recycle-cached
700            - recycle-cache-full
701            - recycle-ring
702            - recycle-ring-full
703            - recycle-released-refcnt
704      dump:
705        request:
706          attributes:
707            - info
708        reply: *pp-stats-reply
709      config-cond: page-pool-stats
710    -
711      name: queue-get
712      doc: Get queue information from the kernel.
713           Only configured queues will be reported (as opposed to all available
714           hardware queues).
715      attribute-set: queue
716      do:
717        request:
718          attributes:
719            - ifindex
720            - type
721            - id
722        reply: &queue-get-op
723          attributes:
724            - id
725            - type
726            - napi-id
727            - ifindex
728            - dmabuf
729            - io-uring
730            - xsk
731            - lease
732      dump:
733        request:
734          attributes:
735            - ifindex
736        reply: *queue-get-op
737    -
738      name: napi-get
739      doc: Get information about NAPI instances configured on the system.
740      attribute-set: napi
741      do:
742        request:
743          attributes:
744            - id
745        reply: &napi-get-op
746          attributes:
747            - id
748            - ifindex
749            - irq
750            - pid
751            - defer-hard-irqs
752            - gro-flush-timeout
753            - irq-suspend-timeout
754            - threaded
755      dump:
756        request:
757          attributes:
758            - ifindex
759        reply: *napi-get-op
760    -
761      name: qstats-get
762      doc: |
763        Get / dump fine grained statistics. Which statistics are reported
764        depends on the device and the driver, and whether the driver stores
765        software counters per-queue.
766      attribute-set: qstats
767      dump:
768        request:
769          attributes:
770            - ifindex
771            - scope
772        reply:
773          attributes:
774            - ifindex
775            - queue-type
776            - queue-id
777            - rx-packets
778            - rx-bytes
779            - tx-packets
780            - tx-bytes
781            - rx-alloc-fail
782            - rx-hw-drops
783            - rx-hw-drop-overruns
784            - rx-csum-complete
785            - rx-csum-unnecessary
786            - rx-csum-none
787            - rx-csum-bad
788            - rx-hw-gro-packets
789            - rx-hw-gro-bytes
790            - rx-hw-gro-wire-packets
791            - rx-hw-gro-wire-bytes
792            - rx-hw-drop-ratelimits
793            - tx-hw-drops
794            - tx-hw-drop-errors
795            - tx-csum-none
796            - tx-needs-csum
797            - tx-hw-gso-packets
798            - tx-hw-gso-bytes
799            - tx-hw-gso-wire-packets
800            - tx-hw-gso-wire-bytes
801            - tx-hw-drop-ratelimits
802            - tx-stop
803            - tx-wake
804    -
805      name: bind-rx
806      doc: Bind dmabuf to netdev
807      attribute-set: dmabuf
808      flags: [uns-admin-perm]
809      do:
810        request:
811          attributes:
812            - ifindex
813            - fd
814            - queues
815        reply:
816          attributes:
817            - id
818    -
819      name: napi-set
820      doc: Set configurable NAPI instance settings.
821      attribute-set: napi
822      flags: [admin-perm]
823      do:
824        request:
825          attributes:
826            - id
827            - defer-hard-irqs
828            - gro-flush-timeout
829            - irq-suspend-timeout
830            - threaded
831    -
832      name: bind-tx
833      doc: Bind dmabuf to netdev for TX
834      attribute-set: dmabuf
835      do:
836        request:
837          attributes:
838            - ifindex
839            - fd
840        reply:
841          attributes:
842            - id
843    -
844      name: queue-create
845      doc: |
846        Create a new queue for the given netdevice. Whether this operation
847        is supported depends on the device and the driver.
848      attribute-set: queue
849      flags: [admin-perm]
850      do:
851        request:
852          attributes:
853            - ifindex
854            - type
855            - lease
856        reply: &queue-create-op
857          attributes:
858            - id
859
860kernel-family:
861  headers: ["net/netdev_netlink.h"]
862  sock-priv: struct netdev_nl_sock
863
864mcast-groups:
865  list:
866    -
867      name: mgmt
868    -
869      name: page-pool
870