xref: /linux/Documentation/netlink/specs/netdev.yaml (revision 6dfafbd0299a60bfb5d5e277fdf100037c7ded07)
1# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
2---
3name: netdev
4
5doc: >-
6  netdev configuration over generic netlink.
7
8definitions:
9  -
10    type: flags
11    name: xdp-act
12    render-max: true
13    entries:
14      -
15        name: basic
16        doc: >-
17          XDP features set supported by all drivers
18          (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
19      -
20        name: redirect
21        doc: >-
22          The netdev supports XDP_REDIRECT
23      -
24        name: ndo-xmit
25        doc: >-
26          This feature informs if netdev implements ndo_xdp_xmit callback.
27      -
28        name: xsk-zerocopy
29        doc: >-
30          This feature informs if netdev supports AF_XDP in zero copy mode.
31      -
32        name: hw-offload
33        doc: >-
34          This feature informs if netdev supports XDP hw offloading.
35      -
36        name: rx-sg
37        doc: >-
38          This feature informs if netdev implements non-linear XDP buffer
39          support in the driver napi callback.
40      -
41        name: ndo-xmit-sg
42        doc: >-
43          This feature informs if netdev implements non-linear XDP buffer
44          support in ndo_xdp_xmit callback.
45  -
46    type: flags
47    name: xdp-rx-metadata
48    entries:
49      -
50        name: timestamp
51        doc: |
52          Device is capable of exposing receive HW timestamp via
53          bpf_xdp_metadata_rx_timestamp().
54      -
55        name: hash
56        doc: |
57          Device is capable of exposing receive packet hash via
58          bpf_xdp_metadata_rx_hash().
59      -
60        name: vlan-tag
61        doc: |
62          Device is capable of exposing receive packet VLAN tag via
63          bpf_xdp_metadata_rx_vlan_tag().
64  -
65    type: flags
66    name: xsk-flags
67    entries:
68      -
69        name: tx-timestamp
70        doc: >-
71          HW timestamping egress packets is supported by the driver.
72      -
73        name: tx-checksum
74        doc: >-
75          L3 checksum HW offload is supported by the driver.
76      -
77        name: tx-launch-time-fifo
78        doc: >-
79          Launch time HW offload is supported by the driver.
80  -
81    name: queue-type
82    type: enum
83    entries: [rx, tx]
84  -
85    name: qstats-scope
86    type: flags
87    entries: [queue]
88  -
89    name: napi-threaded
90    type: enum
91    entries: [disabled, enabled, busy-poll]
92
93attribute-sets:
94  -
95    name: dev
96    attributes:
97      -
98        name: ifindex
99        doc: netdev ifindex
100        type: u32
101        checks:
102          min: 1
103      -
104        name: pad
105        type: pad
106      -
107        name: xdp-features
108        doc: Bitmask of enabled xdp-features.
109        type: u64
110        enum: xdp-act
111      -
112        name: xdp-zc-max-segs
113        doc: max fragment count supported by ZC driver
114        type: u32
115        checks:
116          min: 1
117      -
118        name: xdp-rx-metadata-features
119        doc: Bitmask of supported XDP receive metadata features.
120             See Documentation/networking/xdp-rx-metadata.rst for more details.
121        type: u64
122        enum: xdp-rx-metadata
123      -
124        name: xsk-features
125        doc: Bitmask of enabled AF_XDP features.
126        type: u64
127        enum: xsk-flags
128  -
129    name: io-uring-provider-info
130    attributes: []
131  -
132    name: page-pool
133    attributes:
134      -
135        name: id
136        doc: Unique ID of a Page Pool instance.
137        type: uint
138        checks:
139          min: 1
140          max: u32-max
141      -
142        name: ifindex
143        doc: |
144          ifindex of the netdev to which the pool belongs.
145          May be reported as 0 if the page pool was allocated for a netdev
146          which got destroyed already (page pools may outlast their netdevs
147          because they wait for all memory to be returned).
148        type: u32
149        checks:
150          min: 1
151          max: s32-max
152      -
153        name: napi-id
154        doc: Id of NAPI using this Page Pool instance.
155        type: uint
156        checks:
157          min: 1
158          max: u32-max
159      -
160        name: inflight
161        type: uint
162        doc: |
163          Number of outstanding references to this page pool (allocated
164          but yet to be freed pages). Allocated pages may be held in
165          socket receive queues, driver receive ring, page pool recycling
166          ring, the page pool cache, etc.
167      -
168        name: inflight-mem
169        type: uint
170        doc: |
171          Amount of memory held by inflight pages.
172      -
173        name: detach-time
174        type: uint
175        doc: |
176          Seconds in CLOCK_BOOTTIME of when Page Pool was detached by
177          the driver. Once detached Page Pool can no longer be used to
178          allocate memory.
179          Page Pools wait for all the memory allocated from them to be freed
180          before truly disappearing. "Detached" Page Pools cannot be
181          "re-attached", they are just waiting to disappear.
182          Attribute is absent if Page Pool has not been detached, and
183          can still be used to allocate new memory.
184      -
185        name: dmabuf
186        doc: ID of the dmabuf this page-pool is attached to.
187        type: u32
188      -
189        name: io-uring
190        doc: io-uring memory provider information.
191        type: nest
192        nested-attributes: io-uring-provider-info
193  -
194    name: page-pool-info
195    subset-of: page-pool
196    attributes:
197      -
198        name: id
199      -
200        name: ifindex
201  -
202    name: page-pool-stats
203    doc: |
204      Page pool statistics, see docs for struct page_pool_stats
205      for information about individual statistics.
206    attributes:
207      -
208        name: info
209        doc: Page pool identifying information.
210        type: nest
211        nested-attributes: page-pool-info
212      -
213        name: alloc-fast
214        type: uint
215        value: 8  # reserve some attr ids in case we need more metadata later
216      -
217        name: alloc-slow
218        type: uint
219      -
220        name: alloc-slow-high-order
221        type: uint
222      -
223        name: alloc-empty
224        type: uint
225      -
226        name: alloc-refill
227        type: uint
228      -
229        name: alloc-waive
230        type: uint
231      -
232        name: recycle-cached
233        type: uint
234      -
235        name: recycle-cache-full
236        type: uint
237      -
238        name: recycle-ring
239        type: uint
240      -
241        name: recycle-ring-full
242        type: uint
243      -
244        name: recycle-released-refcnt
245        type: uint
246
247  -
248    name: napi
249    attributes:
250      -
251        name: ifindex
252        doc: ifindex of the netdevice to which NAPI instance belongs.
253        type: u32
254        checks:
255          min: 1
256      -
257        name: id
258        doc: ID of the NAPI instance.
259        type: u32
260      -
261        name: irq
262        doc: The associated interrupt vector number for the napi
263        type: u32
264      -
265        name: pid
266        doc: PID of the napi thread, if NAPI is configured to operate in
267             threaded mode. If NAPI is not in threaded mode (i.e. uses normal
268             softirq context), the attribute will be absent.
269        type: u32
270      -
271        name: defer-hard-irqs
272        doc: The number of consecutive empty polls before IRQ deferral ends
273             and hardware IRQs are re-enabled.
274        type: u32
275        checks:
276          max: s32-max
277      -
278        name: gro-flush-timeout
279        doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog
280             timer which schedules NAPI processing. Additionally, a non-zero
281             value will also prevent GRO from flushing recent super-frames at
282             the end of a NAPI cycle. This may add receive latency in exchange
283             for reducing the number of frames processed by the network stack.
284        type: uint
285      -
286        name: irq-suspend-timeout
287        doc: The timeout, in nanoseconds, of how long to suspend irq
288             processing, if event polling finds events
289        type: uint
290      -
291        name: threaded
292        doc: Whether the NAPI is configured to operate in threaded polling
293             mode. If this is set to enabled then the NAPI context operates
294             in threaded polling mode. If this is set to busy-poll, then the
295             threaded polling mode also busy polls.
296        type: u32
297        enum: napi-threaded
298  -
299    name: xsk-info
300    attributes: []
301  -
302    name: queue
303    attributes:
304      -
305        name: id
306        doc: Queue index; most queue types are indexed like a C array, with
307             indexes starting at 0 and ending at queue count - 1. Queue indexes
308             are scoped to an interface and queue type.
309        type: u32
310      -
311        name: ifindex
312        doc: ifindex of the netdevice to which the queue belongs.
313        type: u32
314        checks:
315          min: 1
316      -
317        name: type
318        doc: Queue type as rx, tx. Each queue type defines a separate ID space.
319             XDP TX queues allocated in the kernel are not linked to NAPIs and
320             thus not listed. AF_XDP queues will have more information set in
321             the xsk attribute.
322        type: u32
323        enum: queue-type
324      -
325        name: napi-id
326        doc: ID of the NAPI instance which services this queue.
327        type: u32
328      -
329        name: dmabuf
330        doc: ID of the dmabuf attached to this queue, if any.
331        type: u32
332      -
333        name: io-uring
334        doc: io_uring memory provider information.
335        type: nest
336        nested-attributes: io-uring-provider-info
337      -
338        name: xsk
339        doc: XSK information for this queue, if any.
340        type: nest
341        nested-attributes: xsk-info
342  -
343    name: qstats
344    doc: |
345      Get device statistics, scoped to a device or a queue.
346      These statistics extend (and partially duplicate) statistics available
347      in struct rtnl_link_stats64.
348      Value of the `scope` attribute determines how statistics are
349      aggregated. When aggregated for the entire device the statistics
350      represent the total number of events since last explicit reset of
351      the device (i.e. not a reconfiguration like changing queue count).
352      When reported per-queue, however, the statistics may not add
353      up to the total number of events, will only be reported for currently
354      active objects, and will likely report the number of events since last
355      reconfiguration.
356    attributes:
357      -
358        name: ifindex
359        doc: ifindex of the netdevice to which stats belong.
360        type: u32
361        checks:
362          min: 1
363      -
364        name: queue-type
365        doc: Queue type as rx, tx, for queue-id.
366        type: u32
367        enum: queue-type
368      -
369        name: queue-id
370        doc: Queue ID, if stats are scoped to a single queue instance.
371        type: u32
372      -
373        name: scope
374        doc: |
375          What object type should be used to iterate over the stats.
376        type: uint
377        enum: qstats-scope
378      -
379        name: rx-packets
380        doc: |
381          Number of wire packets successfully received and passed to the stack.
382          For drivers supporting XDP, XDP is considered the first layer
383          of the stack, so packets consumed by XDP are still counted here.
384        type: uint
385        value: 8  # reserve some attr ids in case we need more metadata later
386      -
387        name: rx-bytes
388        doc: Successfully received bytes, see `rx-packets`.
389        type: uint
390      -
391        name: tx-packets
392        doc: |
393          Number of wire packets successfully sent. Packet is considered to be
394          successfully sent once it is in device memory (usually this means
395          the device has issued a DMA completion for the packet).
396        type: uint
397      -
398        name: tx-bytes
399        doc: Successfully sent bytes, see `tx-packets`.
400        type: uint
401      -
402        name: rx-alloc-fail
403        doc: |
404          Number of times skb or buffer allocation failed on the Rx datapath.
405          Allocation failure may, or may not result in a packet drop, depending
406          on driver implementation and whether system recovers quickly.
407        type: uint
408      -
409        name: rx-hw-drops
410        doc: |
411          Number of all packets which entered the device, but never left it,
412          including but not limited to: packets dropped due to lack of buffer
413          space, processing errors, explicit or implicit policies and packet
414          filters.
415        type: uint
416      -
417        name: rx-hw-drop-overruns
418        doc: |
419          Number of packets dropped due to transient lack of resources, such as
420          buffer space, host descriptors etc.
421        type: uint
422      -
423        name: rx-csum-complete
424        doc: Number of packets that were marked as CHECKSUM_COMPLETE.
425        type: uint
426      -
427        name: rx-csum-unnecessary
428        doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
429        type: uint
430      -
431        name: rx-csum-none
432        doc: Number of packets that were not checksummed by device.
433        type: uint
434      -
435        name: rx-csum-bad
436        doc: |
437          Number of packets with bad checksum. The packets are not discarded,
438          but still delivered to the stack.
439        type: uint
440      -
441        name: rx-hw-gro-packets
442        doc: |
443          Number of packets that were coalesced from smaller packets by the
444          device. Counts only packets coalesced with the HW-GRO netdevice
445          feature, LRO-coalesced packets are not counted.
446        type: uint
447      -
448        name: rx-hw-gro-bytes
449        doc: See `rx-hw-gro-packets`.
450        type: uint
451      -
452        name: rx-hw-gro-wire-packets
453        doc: |
454          Number of packets that were coalesced to bigger packetss with the
455          HW-GRO netdevice feature. LRO-coalesced packets are not counted.
456        type: uint
457      -
458        name: rx-hw-gro-wire-bytes
459        doc: See `rx-hw-gro-wire-packets`.
460        type: uint
461      -
462        name: rx-hw-drop-ratelimits
463        doc: |
464          Number of the packets dropped by the device due to the received
465          packets bitrate exceeding the device rate limit.
466        type: uint
467      -
468        name: tx-hw-drops
469        doc: |
470          Number of packets that arrived at the device but never left it,
471          encompassing packets dropped for reasons such as processing errors, as
472          well as those affected by explicitly defined policies and packet
473          filtering criteria.
474        type: uint
475      -
476        name: tx-hw-drop-errors
477        doc: Number of packets dropped because they were invalid or malformed.
478        type: uint
479      -
480        name: tx-csum-none
481        doc: |
482          Number of packets that did not require the device to calculate the
483          checksum.
484        type: uint
485      -
486        name: tx-needs-csum
487        doc: |
488          Number of packets that required the device to calculate the checksum.
489          This counter includes the number of GSO wire packets for which device
490          calculated the L4 checksum.
491        type: uint
492      -
493        name: tx-hw-gso-packets
494        doc: |
495          Number of packets that necessitated segmentation into smaller packets
496          by the device.
497        type: uint
498      -
499        name: tx-hw-gso-bytes
500        doc: See `tx-hw-gso-packets`.
501        type: uint
502      -
503        name: tx-hw-gso-wire-packets
504        doc: |
505          Number of wire-sized packets generated by processing
506          `tx-hw-gso-packets`
507        type: uint
508      -
509        name: tx-hw-gso-wire-bytes
510        doc: See `tx-hw-gso-wire-packets`.
511        type: uint
512      -
513        name: tx-hw-drop-ratelimits
514        doc: |
515          Number of the packets dropped by the device due to the transmit
516          packets bitrate exceeding the device rate limit.
517        type: uint
518      -
519        name: tx-stop
520        doc: |
521          Number of times driver paused accepting new tx packets
522          from the stack to this queue, because the queue was full.
523          Note that if BQL is supported and enabled on the device
524          the networking stack will avoid queuing a lot of data at once.
525        type: uint
526      -
527        name: tx-wake
528        doc: |
529          Number of times driver re-started accepting send
530          requests to this queue from the stack.
531        type: uint
532  -
533    name: queue-id
534    subset-of: queue
535    attributes:
536      -
537        name: id
538      -
539        name: type
540  -
541    name: dmabuf
542    attributes:
543      -
544        name: ifindex
545        doc: netdev ifindex to bind the dmabuf to.
546        type: u32
547        checks:
548          min: 1
549      -
550        name: queues
551        doc: receive queues to bind the dmabuf to.
552        type: nest
553        nested-attributes: queue-id
554        multi-attr: true
555      -
556        name: fd
557        doc: dmabuf file descriptor to bind.
558        type: u32
559      -
560        name: id
561        doc: id of the dmabuf binding
562        type: u32
563        checks:
564          min: 1
565
566operations:
567  list:
568    -
569      name: dev-get
570      doc: Get / dump information about a netdev.
571      attribute-set: dev
572      do:
573        request:
574          attributes:
575            - ifindex
576        reply: &dev-all
577          attributes:
578            - ifindex
579            - xdp-features
580            - xdp-zc-max-segs
581            - xdp-rx-metadata-features
582            - xsk-features
583      dump:
584        reply: *dev-all
585    -
586      name: dev-add-ntf
587      doc: Notification about device appearing.
588      notify: dev-get
589      mcgrp: mgmt
590    -
591      name: dev-del-ntf
592      doc: Notification about device disappearing.
593      notify: dev-get
594      mcgrp: mgmt
595    -
596      name: dev-change-ntf
597      doc: Notification about device configuration being changed.
598      notify: dev-get
599      mcgrp: mgmt
600    -
601      name: page-pool-get
602      doc: |
603        Get / dump information about Page Pools.
604        (Only Page Pools associated with a net_device can be listed.)
605      attribute-set: page-pool
606      do:
607        request:
608          attributes:
609            - id
610        reply: &pp-reply
611          attributes:
612            - id
613            - ifindex
614            - napi-id
615            - inflight
616            - inflight-mem
617            - detach-time
618            - dmabuf
619            - io-uring
620      dump:
621        reply: *pp-reply
622      config-cond: page-pool
623    -
624      name: page-pool-add-ntf
625      doc: Notification about page pool appearing.
626      notify: page-pool-get
627      mcgrp: page-pool
628      config-cond: page-pool
629    -
630      name: page-pool-del-ntf
631      doc: Notification about page pool disappearing.
632      notify: page-pool-get
633      mcgrp: page-pool
634      config-cond: page-pool
635    -
636      name: page-pool-change-ntf
637      doc: Notification about page pool configuration being changed.
638      notify: page-pool-get
639      mcgrp: page-pool
640      config-cond: page-pool
641    -
642      name: page-pool-stats-get
643      doc: Get page pool statistics.
644      attribute-set: page-pool-stats
645      do:
646        request:
647          attributes:
648            - info
649        reply: &pp-stats-reply
650          attributes:
651            - info
652            - alloc-fast
653            - alloc-slow
654            - alloc-slow-high-order
655            - alloc-empty
656            - alloc-refill
657            - alloc-waive
658            - recycle-cached
659            - recycle-cache-full
660            - recycle-ring
661            - recycle-ring-full
662            - recycle-released-refcnt
663      dump:
664        reply: *pp-stats-reply
665      config-cond: page-pool-stats
666    -
667      name: queue-get
668      doc: Get queue information from the kernel.
669           Only configured queues will be reported (as opposed to all available
670           hardware queues).
671      attribute-set: queue
672      do:
673        request:
674          attributes:
675            - ifindex
676            - type
677            - id
678        reply: &queue-get-op
679          attributes:
680            - id
681            - type
682            - napi-id
683            - ifindex
684            - dmabuf
685            - io-uring
686            - xsk
687      dump:
688        request:
689          attributes:
690            - ifindex
691        reply: *queue-get-op
692    -
693      name: napi-get
694      doc: Get information about NAPI instances configured on the system.
695      attribute-set: napi
696      do:
697        request:
698          attributes:
699            - id
700        reply: &napi-get-op
701          attributes:
702            - id
703            - ifindex
704            - irq
705            - pid
706            - defer-hard-irqs
707            - gro-flush-timeout
708            - irq-suspend-timeout
709            - threaded
710      dump:
711        request:
712          attributes:
713            - ifindex
714        reply: *napi-get-op
715    -
716      name: qstats-get
717      doc: |
718        Get / dump fine grained statistics. Which statistics are reported
719        depends on the device and the driver, and whether the driver stores
720        software counters per-queue.
721      attribute-set: qstats
722      dump:
723        request:
724          attributes:
725            - ifindex
726            - scope
727        reply:
728          attributes:
729            - ifindex
730            - queue-type
731            - queue-id
732            - rx-packets
733            - rx-bytes
734            - tx-packets
735            - tx-bytes
736            - rx-alloc-fail
737            - rx-hw-drops
738            - rx-hw-drop-overruns
739            - rx-csum-complete
740            - rx-csum-unnecessary
741            - rx-csum-none
742            - rx-csum-bad
743            - rx-hw-gro-packets
744            - rx-hw-gro-bytes
745            - rx-hw-gro-wire-packets
746            - rx-hw-gro-wire-bytes
747            - rx-hw-drop-ratelimits
748            - tx-hw-drops
749            - tx-hw-drop-errors
750            - tx-csum-none
751            - tx-needs-csum
752            - tx-hw-gso-packets
753            - tx-hw-gso-bytes
754            - tx-hw-gso-wire-packets
755            - tx-hw-gso-wire-bytes
756            - tx-hw-drop-ratelimits
757            - tx-stop
758            - tx-wake
759    -
760      name: bind-rx
761      doc: Bind dmabuf to netdev
762      attribute-set: dmabuf
763      flags: [admin-perm]
764      do:
765        request:
766          attributes:
767            - ifindex
768            - fd
769            - queues
770        reply:
771          attributes:
772            - id
773    -
774      name: napi-set
775      doc: Set configurable NAPI instance settings.
776      attribute-set: napi
777      flags: [admin-perm]
778      do:
779        request:
780          attributes:
781            - id
782            - defer-hard-irqs
783            - gro-flush-timeout
784            - irq-suspend-timeout
785            - threaded
786    -
787      name: bind-tx
788      doc: Bind dmabuf to netdev for TX
789      attribute-set: dmabuf
790      do:
791        request:
792          attributes:
793            - ifindex
794            - fd
795        reply:
796          attributes:
797            - id
798
799kernel-family:
800  headers: ["net/netdev_netlink.h"]
801  sock-priv: struct netdev_nl_sock
802
803mcast-groups:
804  list:
805    -
806      name: mgmt
807    -
808      name: page-pool
809