xref: /linux/Documentation/netlink/specs/netdev.yaml (revision 1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95)
1# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
2
3name: netdev
4
5doc:
6  netdev configuration over generic netlink.
7
8definitions:
9  -
10    type: flags
11    name: xdp-act
12    render-max: true
13    entries:
14      -
15        name: basic
16        doc:
17          XDP features set supported by all drivers
18          (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
19      -
20        name: redirect
21        doc:
22          The netdev supports XDP_REDIRECT
23      -
24        name: ndo-xmit
25        doc:
26          This feature informs if netdev implements ndo_xdp_xmit callback.
27      -
28        name: xsk-zerocopy
29        doc:
30          This feature informs if netdev supports AF_XDP in zero copy mode.
31      -
32        name: hw-offload
33        doc:
34         This feature informs if netdev supports XDP hw offloading.
35      -
36        name: rx-sg
37        doc:
38          This feature informs if netdev implements non-linear XDP buffer
39          support in the driver napi callback.
40      -
41        name: ndo-xmit-sg
42        doc:
43          This feature informs if netdev implements non-linear XDP buffer
44          support in ndo_xdp_xmit callback.
45  -
46    type: flags
47    name: xdp-rx-metadata
48    entries:
49      -
50        name: timestamp
51        doc:
52          Device is capable of exposing receive HW timestamp via bpf_xdp_metadata_rx_timestamp().
53      -
54        name: hash
55        doc:
56          Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash().
57      -
58        name: vlan-tag
59        doc:
60          Device is capable of exposing receive packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
61  -
62    type: flags
63    name: xsk-flags
64    entries:
65      -
66        name: tx-timestamp
67        doc:
68          HW timestamping egress packets is supported by the driver.
69      -
70        name: tx-checksum
71        doc:
72          L3 checksum HW offload is supported by the driver.
73      -
74        name: tx-launch-time-fifo
75        doc:
76          Launch time HW offload is supported by the driver.
77  -
78    name: queue-type
79    type: enum
80    entries: [ rx, tx ]
81  -
82    name: qstats-scope
83    type: flags
84    entries: [ queue ]
85
86attribute-sets:
87  -
88    name: dev
89    attributes:
90      -
91        name: ifindex
92        doc: netdev ifindex
93        type: u32
94        checks:
95          min: 1
96      -
97        name: pad
98        type: pad
99      -
100        name: xdp-features
101        doc: Bitmask of enabled xdp-features.
102        type: u64
103        enum: xdp-act
104      -
105        name: xdp-zc-max-segs
106        doc: max fragment count supported by ZC driver
107        type: u32
108        checks:
109          min: 1
110      -
111        name: xdp-rx-metadata-features
112        doc: Bitmask of supported XDP receive metadata features.
113             See Documentation/networking/xdp-rx-metadata.rst for more details.
114        type: u64
115        enum: xdp-rx-metadata
116      -
117        name: xsk-features
118        doc: Bitmask of enabled AF_XDP features.
119        type: u64
120        enum: xsk-flags
121  -
122    name: io-uring-provider-info
123    attributes: []
124  -
125    name: page-pool
126    attributes:
127      -
128        name: id
129        doc: Unique ID of a Page Pool instance.
130        type: uint
131        checks:
132          min: 1
133          max: u32-max
134      -
135        name: ifindex
136        doc: |
137          ifindex of the netdev to which the pool belongs.
138          May be reported as 0 if the page pool was allocated for a netdev
139          which got destroyed already (page pools may outlast their netdevs
140          because they wait for all memory to be returned).
141        type: u32
142        checks:
143          min: 1
144          max: s32-max
145      -
146        name: napi-id
147        doc: Id of NAPI using this Page Pool instance.
148        type: uint
149        checks:
150          min: 1
151          max: u32-max
152      -
153        name: inflight
154        type: uint
155        doc: |
156          Number of outstanding references to this page pool (allocated
157          but yet to be freed pages). Allocated pages may be held in
158          socket receive queues, driver receive ring, page pool recycling
159          ring, the page pool cache, etc.
160      -
161        name: inflight-mem
162        type: uint
163        doc: |
164          Amount of memory held by inflight pages.
165      -
166        name: detach-time
167        type: uint
168        doc: |
169          Seconds in CLOCK_BOOTTIME of when Page Pool was detached by
170          the driver. Once detached Page Pool can no longer be used to
171          allocate memory.
172          Page Pools wait for all the memory allocated from them to be freed
173          before truly disappearing. "Detached" Page Pools cannot be
174          "re-attached", they are just waiting to disappear.
175          Attribute is absent if Page Pool has not been detached, and
176          can still be used to allocate new memory.
177      -
178        name: dmabuf
179        doc: ID of the dmabuf this page-pool is attached to.
180        type: u32
181      -
182        name: io-uring
183        doc: io-uring memory provider information.
184        type: nest
185        nested-attributes: io-uring-provider-info
186  -
187    name: page-pool-info
188    subset-of: page-pool
189    attributes:
190      -
191        name: id
192      -
193        name: ifindex
194  -
195    name: page-pool-stats
196    doc: |
197      Page pool statistics, see docs for struct page_pool_stats
198      for information about individual statistics.
199    attributes:
200      -
201        name: info
202        doc: Page pool identifying information.
203        type: nest
204        nested-attributes: page-pool-info
205      -
206        name: alloc-fast
207        type: uint
208        value: 8 # reserve some attr ids in case we need more metadata later
209      -
210        name: alloc-slow
211        type: uint
212      -
213        name: alloc-slow-high-order
214        type: uint
215      -
216        name: alloc-empty
217        type: uint
218      -
219        name: alloc-refill
220        type: uint
221      -
222        name: alloc-waive
223        type: uint
224      -
225        name: recycle-cached
226        type: uint
227      -
228        name: recycle-cache-full
229        type: uint
230      -
231        name: recycle-ring
232        type: uint
233      -
234        name: recycle-ring-full
235        type: uint
236      -
237        name: recycle-released-refcnt
238        type: uint
239
240  -
241    name: napi
242    attributes:
243      -
244        name: ifindex
245        doc: ifindex of the netdevice to which NAPI instance belongs.
246        type: u32
247        checks:
248          min: 1
249      -
250        name: id
251        doc: ID of the NAPI instance.
252        type: u32
253      -
254        name: irq
255        doc: The associated interrupt vector number for the napi
256        type: u32
257      -
258        name: pid
259        doc: PID of the napi thread, if NAPI is configured to operate in
260             threaded mode. If NAPI is not in threaded mode (i.e. uses normal
261             softirq context), the attribute will be absent.
262        type: u32
263      -
264        name: defer-hard-irqs
265        doc: The number of consecutive empty polls before IRQ deferral ends
266             and hardware IRQs are re-enabled.
267        type: u32
268        checks:
269          max: s32-max
270      -
271        name: gro-flush-timeout
272        doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog
273             timer which schedules NAPI processing. Additionally, a non-zero
274             value will also prevent GRO from flushing recent super-frames at
275             the end of a NAPI cycle. This may add receive latency in exchange
276             for reducing the number of frames processed by the network stack.
277        type: uint
278      -
279        name: irq-suspend-timeout
280        doc: The timeout, in nanoseconds, of how long to suspend irq
281             processing, if event polling finds events
282        type: uint
283  -
284    name: xsk-info
285    attributes: []
286  -
287    name: queue
288    attributes:
289      -
290        name: id
291        doc: Queue index; most queue types are indexed like a C array, with
292             indexes starting at 0 and ending at queue count - 1. Queue indexes
293             are scoped to an interface and queue type.
294        type: u32
295      -
296        name: ifindex
297        doc: ifindex of the netdevice to which the queue belongs.
298        type: u32
299        checks:
300          min: 1
301      -
302        name: type
303        doc: Queue type as rx, tx. Each queue type defines a separate ID space.
304             XDP TX queues allocated in the kernel are not linked to NAPIs and
305             thus not listed. AF_XDP queues will have more information set in
306             the xsk attribute.
307        type: u32
308        enum: queue-type
309      -
310        name: napi-id
311        doc: ID of the NAPI instance which services this queue.
312        type: u32
313      -
314        name: dmabuf
315        doc: ID of the dmabuf attached to this queue, if any.
316        type: u32
317      -
318        name: io-uring
319        doc: io_uring memory provider information.
320        type: nest
321        nested-attributes: io-uring-provider-info
322      -
323        name: xsk
324        doc: XSK information for this queue, if any.
325        type: nest
326        nested-attributes: xsk-info
327  -
328    name: qstats
329    doc: |
330      Get device statistics, scoped to a device or a queue.
331      These statistics extend (and partially duplicate) statistics available
332      in struct rtnl_link_stats64.
333      Value of the `scope` attribute determines how statistics are
334      aggregated. When aggregated for the entire device the statistics
335      represent the total number of events since last explicit reset of
336      the device (i.e. not a reconfiguration like changing queue count).
337      When reported per-queue, however, the statistics may not add
338      up to the total number of events, will only be reported for currently
339      active objects, and will likely report the number of events since last
340      reconfiguration.
341    attributes:
342      -
343        name: ifindex
344        doc: ifindex of the netdevice to which stats belong.
345        type: u32
346        checks:
347          min: 1
348      -
349        name: queue-type
350        doc: Queue type as rx, tx, for queue-id.
351        type: u32
352        enum: queue-type
353      -
354        name: queue-id
355        doc: Queue ID, if stats are scoped to a single queue instance.
356        type: u32
357      -
358        name: scope
359        doc: |
360          What object type should be used to iterate over the stats.
361        type: uint
362        enum: qstats-scope
363      -
364        name: rx-packets
365        doc: |
366          Number of wire packets successfully received and passed to the stack.
367          For drivers supporting XDP, XDP is considered the first layer
368          of the stack, so packets consumed by XDP are still counted here.
369        type: uint
370        value: 8 # reserve some attr ids in case we need more metadata later
371      -
372        name: rx-bytes
373        doc: Successfully received bytes, see `rx-packets`.
374        type: uint
375      -
376        name: tx-packets
377        doc: |
378          Number of wire packets successfully sent. Packet is considered to be
379          successfully sent once it is in device memory (usually this means
380          the device has issued a DMA completion for the packet).
381        type: uint
382      -
383        name: tx-bytes
384        doc: Successfully sent bytes, see `tx-packets`.
385        type: uint
386      -
387        name: rx-alloc-fail
388        doc: |
389          Number of times skb or buffer allocation failed on the Rx datapath.
390          Allocation failure may, or may not result in a packet drop, depending
391          on driver implementation and whether system recovers quickly.
392        type: uint
393      -
394        name: rx-hw-drops
395        doc: |
396          Number of all packets which entered the device, but never left it,
397          including but not limited to: packets dropped due to lack of buffer
398          space, processing errors, explicit or implicit policies and packet
399          filters.
400        type: uint
401      -
402        name: rx-hw-drop-overruns
403        doc: |
404          Number of packets dropped due to transient lack of resources, such as
405          buffer space, host descriptors etc.
406        type: uint
407      -
408        name: rx-csum-complete
409        doc: Number of packets that were marked as CHECKSUM_COMPLETE.
410        type: uint
411      -
412        name: rx-csum-unnecessary
413        doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
414        type: uint
415      -
416        name: rx-csum-none
417        doc: Number of packets that were not checksummed by device.
418        type: uint
419      -
420        name: rx-csum-bad
421        doc: |
422          Number of packets with bad checksum. The packets are not discarded,
423          but still delivered to the stack.
424        type: uint
425      -
426        name: rx-hw-gro-packets
427        doc: |
428          Number of packets that were coalesced from smaller packets by the device.
429          Counts only packets coalesced with the HW-GRO netdevice feature,
430          LRO-coalesced packets are not counted.
431        type: uint
432      -
433        name: rx-hw-gro-bytes
434        doc: See `rx-hw-gro-packets`.
435        type: uint
436      -
437        name: rx-hw-gro-wire-packets
438        doc: |
439          Number of packets that were coalesced to bigger packetss with the HW-GRO
440          netdevice feature. LRO-coalesced packets are not counted.
441        type: uint
442      -
443        name: rx-hw-gro-wire-bytes
444        doc: See `rx-hw-gro-wire-packets`.
445        type: uint
446      -
447        name: rx-hw-drop-ratelimits
448        doc: |
449          Number of the packets dropped by the device due to the received
450          packets bitrate exceeding the device rate limit.
451        type: uint
452      -
453        name: tx-hw-drops
454        doc: |
455          Number of packets that arrived at the device but never left it,
456          encompassing packets dropped for reasons such as processing errors, as
457          well as those affected by explicitly defined policies and packet
458          filtering criteria.
459        type: uint
460      -
461        name: tx-hw-drop-errors
462        doc: Number of packets dropped because they were invalid or malformed.
463        type: uint
464      -
465        name: tx-csum-none
466        doc: |
467          Number of packets that did not require the device to calculate the
468          checksum.
469        type: uint
470      -
471        name: tx-needs-csum
472        doc: |
473          Number of packets that required the device to calculate the checksum.
474          This counter includes the number of GSO wire packets for which device
475          calculated the L4 checksum.
476        type: uint
477      -
478        name: tx-hw-gso-packets
479        doc: |
480          Number of packets that necessitated segmentation into smaller packets
481          by the device.
482        type: uint
483      -
484        name: tx-hw-gso-bytes
485        doc: See `tx-hw-gso-packets`.
486        type: uint
487      -
488        name: tx-hw-gso-wire-packets
489        doc: |
490          Number of wire-sized packets generated by processing
491          `tx-hw-gso-packets`
492        type: uint
493      -
494        name: tx-hw-gso-wire-bytes
495        doc: See `tx-hw-gso-wire-packets`.
496        type: uint
497      -
498        name: tx-hw-drop-ratelimits
499        doc: |
500          Number of the packets dropped by the device due to the transmit
501          packets bitrate exceeding the device rate limit.
502        type: uint
503      -
504        name: tx-stop
505        doc: |
506          Number of times driver paused accepting new tx packets
507          from the stack to this queue, because the queue was full.
508          Note that if BQL is supported and enabled on the device
509          the networking stack will avoid queuing a lot of data at once.
510        type: uint
511      -
512        name: tx-wake
513        doc: |
514          Number of times driver re-started accepting send
515          requests to this queue from the stack.
516        type: uint
517  -
518    name: queue-id
519    subset-of: queue
520    attributes:
521      -
522        name: id
523      -
524        name: type
525  -
526    name: dmabuf
527    attributes:
528      -
529        name: ifindex
530        doc: netdev ifindex to bind the dmabuf to.
531        type: u32
532        checks:
533          min: 1
534      -
535        name: queues
536        doc: receive queues to bind the dmabuf to.
537        type: nest
538        nested-attributes: queue-id
539        multi-attr: true
540      -
541        name: fd
542        doc: dmabuf file descriptor to bind.
543        type: u32
544      -
545        name: id
546        doc: id of the dmabuf binding
547        type: u32
548        checks:
549          min: 1
550
551operations:
552  list:
553    -
554      name: dev-get
555      doc: Get / dump information about a netdev.
556      attribute-set: dev
557      do:
558        request:
559          attributes:
560            - ifindex
561        reply: &dev-all
562          attributes:
563            - ifindex
564            - xdp-features
565            - xdp-zc-max-segs
566            - xdp-rx-metadata-features
567            - xsk-features
568      dump:
569        reply: *dev-all
570    -
571      name: dev-add-ntf
572      doc: Notification about device appearing.
573      notify: dev-get
574      mcgrp: mgmt
575    -
576      name: dev-del-ntf
577      doc: Notification about device disappearing.
578      notify: dev-get
579      mcgrp: mgmt
580    -
581      name: dev-change-ntf
582      doc: Notification about device configuration being changed.
583      notify: dev-get
584      mcgrp: mgmt
585    -
586      name: page-pool-get
587      doc: |
588        Get / dump information about Page Pools.
589        (Only Page Pools associated with a net_device can be listed.)
590      attribute-set: page-pool
591      do:
592        request:
593          attributes:
594            - id
595        reply: &pp-reply
596          attributes:
597            - id
598            - ifindex
599            - napi-id
600            - inflight
601            - inflight-mem
602            - detach-time
603            - dmabuf
604            - io-uring
605      dump:
606        reply: *pp-reply
607      config-cond: page-pool
608    -
609      name: page-pool-add-ntf
610      doc: Notification about page pool appearing.
611      notify: page-pool-get
612      mcgrp: page-pool
613      config-cond: page-pool
614    -
615      name: page-pool-del-ntf
616      doc: Notification about page pool disappearing.
617      notify: page-pool-get
618      mcgrp: page-pool
619      config-cond: page-pool
620    -
621      name: page-pool-change-ntf
622      doc: Notification about page pool configuration being changed.
623      notify: page-pool-get
624      mcgrp: page-pool
625      config-cond: page-pool
626    -
627      name: page-pool-stats-get
628      doc: Get page pool statistics.
629      attribute-set: page-pool-stats
630      do:
631        request:
632          attributes:
633            - info
634        reply: &pp-stats-reply
635          attributes:
636            - info
637            - alloc-fast
638            - alloc-slow
639            - alloc-slow-high-order
640            - alloc-empty
641            - alloc-refill
642            - alloc-waive
643            - recycle-cached
644            - recycle-cache-full
645            - recycle-ring
646            - recycle-ring-full
647            - recycle-released-refcnt
648      dump:
649        reply: *pp-stats-reply
650      config-cond: page-pool-stats
651    -
652      name: queue-get
653      doc: Get queue information from the kernel.
654           Only configured queues will be reported (as opposed to all available
655           hardware queues).
656      attribute-set: queue
657      do:
658        request:
659          attributes:
660            - ifindex
661            - type
662            - id
663        reply: &queue-get-op
664          attributes:
665            - id
666            - type
667            - napi-id
668            - ifindex
669            - dmabuf
670            - io-uring
671            - xsk
672      dump:
673        request:
674          attributes:
675            - ifindex
676        reply: *queue-get-op
677    -
678      name: napi-get
679      doc: Get information about NAPI instances configured on the system.
680      attribute-set: napi
681      do:
682        request:
683          attributes:
684            - id
685        reply: &napi-get-op
686          attributes:
687            - id
688            - ifindex
689            - irq
690            - pid
691            - defer-hard-irqs
692            - gro-flush-timeout
693            - irq-suspend-timeout
694      dump:
695        request:
696          attributes:
697            - ifindex
698        reply: *napi-get-op
699    -
700      name: qstats-get
701      doc: |
702        Get / dump fine grained statistics. Which statistics are reported
703        depends on the device and the driver, and whether the driver stores
704        software counters per-queue.
705      attribute-set: qstats
706      dump:
707        request:
708          attributes:
709            - ifindex
710            - scope
711        reply:
712          attributes:
713            - ifindex
714            - queue-type
715            - queue-id
716            - rx-packets
717            - rx-bytes
718            - tx-packets
719            - tx-bytes
720    -
721      name: bind-rx
722      doc: Bind dmabuf to netdev
723      attribute-set: dmabuf
724      flags: [ admin-perm ]
725      do:
726        request:
727          attributes:
728            - ifindex
729            - fd
730            - queues
731        reply:
732          attributes:
733            - id
734    -
735      name: napi-set
736      doc: Set configurable NAPI instance settings.
737      attribute-set: napi
738      flags: [ admin-perm ]
739      do:
740        request:
741          attributes:
742            - id
743            - defer-hard-irqs
744            - gro-flush-timeout
745            - irq-suspend-timeout
746
747kernel-family:
748  headers: [ "net/netdev_netlink.h"]
749  sock-priv: struct netdev_nl_sock
750
751mcast-groups:
752  list:
753    -
754      name: mgmt
755    -
756      name: page-pool
757