xref: /linux/Documentation/netlink/specs/netdev.yaml (revision 34f2573661e3e644efaf383178af634a2fd67828)
1# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
2---
3name: netdev
4
5doc:
6  netdev configuration over generic netlink.
7
8definitions:
9  -
10    type: flags
11    name: xdp-act
12    render-max: true
13    entries:
14      -
15        name: basic
16        doc:
17          XDP features set supported by all drivers
18          (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
19      -
20        name: redirect
21        doc:
22          The netdev supports XDP_REDIRECT
23      -
24        name: ndo-xmit
25        doc:
26          This feature informs if netdev implements ndo_xdp_xmit callback.
27      -
28        name: xsk-zerocopy
29        doc:
30          This feature informs if netdev supports AF_XDP in zero copy mode.
31      -
32        name: hw-offload
33        doc:
34          This feature informs if netdev supports XDP hw offloading.
35      -
36        name: rx-sg
37        doc:
38          This feature informs if netdev implements non-linear XDP buffer
39          support in the driver napi callback.
40      -
41        name: ndo-xmit-sg
42        doc:
43          This feature informs if netdev implements non-linear XDP buffer
44          support in ndo_xdp_xmit callback.
45  -
46    type: flags
47    name: xdp-rx-metadata
48    entries:
49      -
50        name: timestamp
51        doc: |
52          Device is capable of exposing receive HW timestamp via
53          bpf_xdp_metadata_rx_timestamp().
54      -
55        name: hash
56        doc: |
57          Device is capable of exposing receive packet hash via
58          bpf_xdp_metadata_rx_hash().
59      -
60        name: vlan-tag
61        doc: |
62          Device is capable of exposing receive packet VLAN tag via
63          bpf_xdp_metadata_rx_vlan_tag().
64  -
65    type: flags
66    name: xsk-flags
67    entries:
68      -
69        name: tx-timestamp
70        doc:
71          HW timestamping egress packets is supported by the driver.
72      -
73        name: tx-checksum
74        doc:
75          L3 checksum HW offload is supported by the driver.
76      -
77        name: tx-launch-time-fifo
78        doc:
79          Launch time HW offload is supported by the driver.
80  -
81    name: queue-type
82    type: enum
83    entries: [rx, tx]
84  -
85    name: qstats-scope
86    type: flags
87    entries: [queue]
88
89attribute-sets:
90  -
91    name: dev
92    attributes:
93      -
94        name: ifindex
95        doc: netdev ifindex
96        type: u32
97        checks:
98          min: 1
99      -
100        name: pad
101        type: pad
102      -
103        name: xdp-features
104        doc: Bitmask of enabled xdp-features.
105        type: u64
106        enum: xdp-act
107      -
108        name: xdp-zc-max-segs
109        doc: max fragment count supported by ZC driver
110        type: u32
111        checks:
112          min: 1
113      -
114        name: xdp-rx-metadata-features
115        doc: Bitmask of supported XDP receive metadata features.
116             See Documentation/networking/xdp-rx-metadata.rst for more details.
117        type: u64
118        enum: xdp-rx-metadata
119      -
120        name: xsk-features
121        doc: Bitmask of enabled AF_XDP features.
122        type: u64
123        enum: xsk-flags
124  -
125    name: io-uring-provider-info
126    attributes: []
127  -
128    name: page-pool
129    attributes:
130      -
131        name: id
132        doc: Unique ID of a Page Pool instance.
133        type: uint
134        checks:
135          min: 1
136          max: u32-max
137      -
138        name: ifindex
139        doc: |
140          ifindex of the netdev to which the pool belongs.
141          May be reported as 0 if the page pool was allocated for a netdev
142          which got destroyed already (page pools may outlast their netdevs
143          because they wait for all memory to be returned).
144        type: u32
145        checks:
146          min: 1
147          max: s32-max
148      -
149        name: napi-id
150        doc: Id of NAPI using this Page Pool instance.
151        type: uint
152        checks:
153          min: 1
154          max: u32-max
155      -
156        name: inflight
157        type: uint
158        doc: |
159          Number of outstanding references to this page pool (allocated
160          but yet to be freed pages). Allocated pages may be held in
161          socket receive queues, driver receive ring, page pool recycling
162          ring, the page pool cache, etc.
163      -
164        name: inflight-mem
165        type: uint
166        doc: |
167          Amount of memory held by inflight pages.
168      -
169        name: detach-time
170        type: uint
171        doc: |
172          Seconds in CLOCK_BOOTTIME of when Page Pool was detached by
173          the driver. Once detached Page Pool can no longer be used to
174          allocate memory.
175          Page Pools wait for all the memory allocated from them to be freed
176          before truly disappearing. "Detached" Page Pools cannot be
177          "re-attached", they are just waiting to disappear.
178          Attribute is absent if Page Pool has not been detached, and
179          can still be used to allocate new memory.
180      -
181        name: dmabuf
182        doc: ID of the dmabuf this page-pool is attached to.
183        type: u32
184      -
185        name: io-uring
186        doc: io-uring memory provider information.
187        type: nest
188        nested-attributes: io-uring-provider-info
189  -
190    name: page-pool-info
191    subset-of: page-pool
192    attributes:
193      -
194        name: id
195      -
196        name: ifindex
197  -
198    name: page-pool-stats
199    doc: |
200      Page pool statistics, see docs for struct page_pool_stats
201      for information about individual statistics.
202    attributes:
203      -
204        name: info
205        doc: Page pool identifying information.
206        type: nest
207        nested-attributes: page-pool-info
208      -
209        name: alloc-fast
210        type: uint
211        value: 8  # reserve some attr ids in case we need more metadata later
212      -
213        name: alloc-slow
214        type: uint
215      -
216        name: alloc-slow-high-order
217        type: uint
218      -
219        name: alloc-empty
220        type: uint
221      -
222        name: alloc-refill
223        type: uint
224      -
225        name: alloc-waive
226        type: uint
227      -
228        name: recycle-cached
229        type: uint
230      -
231        name: recycle-cache-full
232        type: uint
233      -
234        name: recycle-ring
235        type: uint
236      -
237        name: recycle-ring-full
238        type: uint
239      -
240        name: recycle-released-refcnt
241        type: uint
242
243  -
244    name: napi
245    attributes:
246      -
247        name: ifindex
248        doc: ifindex of the netdevice to which NAPI instance belongs.
249        type: u32
250        checks:
251          min: 1
252      -
253        name: id
254        doc: ID of the NAPI instance.
255        type: u32
256      -
257        name: irq
258        doc: The associated interrupt vector number for the napi
259        type: u32
260      -
261        name: pid
262        doc: PID of the napi thread, if NAPI is configured to operate in
263             threaded mode. If NAPI is not in threaded mode (i.e. uses normal
264             softirq context), the attribute will be absent.
265        type: u32
266      -
267        name: defer-hard-irqs
268        doc: The number of consecutive empty polls before IRQ deferral ends
269             and hardware IRQs are re-enabled.
270        type: u32
271        checks:
272          max: s32-max
273      -
274        name: gro-flush-timeout
275        doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog
276             timer which schedules NAPI processing. Additionally, a non-zero
277             value will also prevent GRO from flushing recent super-frames at
278             the end of a NAPI cycle. This may add receive latency in exchange
279             for reducing the number of frames processed by the network stack.
280        type: uint
281      -
282        name: irq-suspend-timeout
283        doc: The timeout, in nanoseconds, of how long to suspend irq
284             processing, if event polling finds events
285        type: uint
286  -
287    name: xsk-info
288    attributes: []
289  -
290    name: queue
291    attributes:
292      -
293        name: id
294        doc: Queue index; most queue types are indexed like a C array, with
295             indexes starting at 0 and ending at queue count - 1. Queue indexes
296             are scoped to an interface and queue type.
297        type: u32
298      -
299        name: ifindex
300        doc: ifindex of the netdevice to which the queue belongs.
301        type: u32
302        checks:
303          min: 1
304      -
305        name: type
306        doc: Queue type as rx, tx. Each queue type defines a separate ID space.
307             XDP TX queues allocated in the kernel are not linked to NAPIs and
308             thus not listed. AF_XDP queues will have more information set in
309             the xsk attribute.
310        type: u32
311        enum: queue-type
312      -
313        name: napi-id
314        doc: ID of the NAPI instance which services this queue.
315        type: u32
316      -
317        name: dmabuf
318        doc: ID of the dmabuf attached to this queue, if any.
319        type: u32
320      -
321        name: io-uring
322        doc: io_uring memory provider information.
323        type: nest
324        nested-attributes: io-uring-provider-info
325      -
326        name: xsk
327        doc: XSK information for this queue, if any.
328        type: nest
329        nested-attributes: xsk-info
330  -
331    name: qstats
332    doc: |
333      Get device statistics, scoped to a device or a queue.
334      These statistics extend (and partially duplicate) statistics available
335      in struct rtnl_link_stats64.
336      Value of the `scope` attribute determines how statistics are
337      aggregated. When aggregated for the entire device the statistics
338      represent the total number of events since last explicit reset of
339      the device (i.e. not a reconfiguration like changing queue count).
340      When reported per-queue, however, the statistics may not add
341      up to the total number of events, will only be reported for currently
342      active objects, and will likely report the number of events since last
343      reconfiguration.
344    attributes:
345      -
346        name: ifindex
347        doc: ifindex of the netdevice to which stats belong.
348        type: u32
349        checks:
350          min: 1
351      -
352        name: queue-type
353        doc: Queue type as rx, tx, for queue-id.
354        type: u32
355        enum: queue-type
356      -
357        name: queue-id
358        doc: Queue ID, if stats are scoped to a single queue instance.
359        type: u32
360      -
361        name: scope
362        doc: |
363          What object type should be used to iterate over the stats.
364        type: uint
365        enum: qstats-scope
366      -
367        name: rx-packets
368        doc: |
369          Number of wire packets successfully received and passed to the stack.
370          For drivers supporting XDP, XDP is considered the first layer
371          of the stack, so packets consumed by XDP are still counted here.
372        type: uint
373        value: 8  # reserve some attr ids in case we need more metadata later
374      -
375        name: rx-bytes
376        doc: Successfully received bytes, see `rx-packets`.
377        type: uint
378      -
379        name: tx-packets
380        doc: |
381          Number of wire packets successfully sent. Packet is considered to be
382          successfully sent once it is in device memory (usually this means
383          the device has issued a DMA completion for the packet).
384        type: uint
385      -
386        name: tx-bytes
387        doc: Successfully sent bytes, see `tx-packets`.
388        type: uint
389      -
390        name: rx-alloc-fail
391        doc: |
392          Number of times skb or buffer allocation failed on the Rx datapath.
393          Allocation failure may, or may not result in a packet drop, depending
394          on driver implementation and whether system recovers quickly.
395        type: uint
396      -
397        name: rx-hw-drops
398        doc: |
399          Number of all packets which entered the device, but never left it,
400          including but not limited to: packets dropped due to lack of buffer
401          space, processing errors, explicit or implicit policies and packet
402          filters.
403        type: uint
404      -
405        name: rx-hw-drop-overruns
406        doc: |
407          Number of packets dropped due to transient lack of resources, such as
408          buffer space, host descriptors etc.
409        type: uint
410      -
411        name: rx-csum-complete
412        doc: Number of packets that were marked as CHECKSUM_COMPLETE.
413        type: uint
414      -
415        name: rx-csum-unnecessary
416        doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
417        type: uint
418      -
419        name: rx-csum-none
420        doc: Number of packets that were not checksummed by device.
421        type: uint
422      -
423        name: rx-csum-bad
424        doc: |
425          Number of packets with bad checksum. The packets are not discarded,
426          but still delivered to the stack.
427        type: uint
428      -
429        name: rx-hw-gro-packets
430        doc: |
431          Number of packets that were coalesced from smaller packets by the
432          device. Counts only packets coalesced with the HW-GRO netdevice
433          feature, LRO-coalesced packets are not counted.
434        type: uint
435      -
436        name: rx-hw-gro-bytes
437        doc: See `rx-hw-gro-packets`.
438        type: uint
439      -
440        name: rx-hw-gro-wire-packets
441        doc: |
442          Number of packets that were coalesced to bigger packetss with the
443          HW-GRO netdevice feature. LRO-coalesced packets are not counted.
444        type: uint
445      -
446        name: rx-hw-gro-wire-bytes
447        doc: See `rx-hw-gro-wire-packets`.
448        type: uint
449      -
450        name: rx-hw-drop-ratelimits
451        doc: |
452          Number of the packets dropped by the device due to the received
453          packets bitrate exceeding the device rate limit.
454        type: uint
455      -
456        name: tx-hw-drops
457        doc: |
458          Number of packets that arrived at the device but never left it,
459          encompassing packets dropped for reasons such as processing errors, as
460          well as those affected by explicitly defined policies and packet
461          filtering criteria.
462        type: uint
463      -
464        name: tx-hw-drop-errors
465        doc: Number of packets dropped because they were invalid or malformed.
466        type: uint
467      -
468        name: tx-csum-none
469        doc: |
470          Number of packets that did not require the device to calculate the
471          checksum.
472        type: uint
473      -
474        name: tx-needs-csum
475        doc: |
476          Number of packets that required the device to calculate the checksum.
477          This counter includes the number of GSO wire packets for which device
478          calculated the L4 checksum.
479        type: uint
480      -
481        name: tx-hw-gso-packets
482        doc: |
483          Number of packets that necessitated segmentation into smaller packets
484          by the device.
485        type: uint
486      -
487        name: tx-hw-gso-bytes
488        doc: See `tx-hw-gso-packets`.
489        type: uint
490      -
491        name: tx-hw-gso-wire-packets
492        doc: |
493          Number of wire-sized packets generated by processing
494          `tx-hw-gso-packets`
495        type: uint
496      -
497        name: tx-hw-gso-wire-bytes
498        doc: See `tx-hw-gso-wire-packets`.
499        type: uint
500      -
501        name: tx-hw-drop-ratelimits
502        doc: |
503          Number of the packets dropped by the device due to the transmit
504          packets bitrate exceeding the device rate limit.
505        type: uint
506      -
507        name: tx-stop
508        doc: |
509          Number of times driver paused accepting new tx packets
510          from the stack to this queue, because the queue was full.
511          Note that if BQL is supported and enabled on the device
512          the networking stack will avoid queuing a lot of data at once.
513        type: uint
514      -
515        name: tx-wake
516        doc: |
517          Number of times driver re-started accepting send
518          requests to this queue from the stack.
519        type: uint
520  -
521    name: queue-id
522    subset-of: queue
523    attributes:
524      -
525        name: id
526      -
527        name: type
528  -
529    name: dmabuf
530    attributes:
531      -
532        name: ifindex
533        doc: netdev ifindex to bind the dmabuf to.
534        type: u32
535        checks:
536          min: 1
537      -
538        name: queues
539        doc: receive queues to bind the dmabuf to.
540        type: nest
541        nested-attributes: queue-id
542        multi-attr: true
543      -
544        name: fd
545        doc: dmabuf file descriptor to bind.
546        type: u32
547      -
548        name: id
549        doc: id of the dmabuf binding
550        type: u32
551        checks:
552          min: 1
553
554operations:
555  list:
556    -
557      name: dev-get
558      doc: Get / dump information about a netdev.
559      attribute-set: dev
560      do:
561        request:
562          attributes:
563            - ifindex
564        reply: &dev-all
565          attributes:
566            - ifindex
567            - xdp-features
568            - xdp-zc-max-segs
569            - xdp-rx-metadata-features
570            - xsk-features
571      dump:
572        reply: *dev-all
573    -
574      name: dev-add-ntf
575      doc: Notification about device appearing.
576      notify: dev-get
577      mcgrp: mgmt
578    -
579      name: dev-del-ntf
580      doc: Notification about device disappearing.
581      notify: dev-get
582      mcgrp: mgmt
583    -
584      name: dev-change-ntf
585      doc: Notification about device configuration being changed.
586      notify: dev-get
587      mcgrp: mgmt
588    -
589      name: page-pool-get
590      doc: |
591        Get / dump information about Page Pools.
592        (Only Page Pools associated with a net_device can be listed.)
593      attribute-set: page-pool
594      do:
595        request:
596          attributes:
597            - id
598        reply: &pp-reply
599          attributes:
600            - id
601            - ifindex
602            - napi-id
603            - inflight
604            - inflight-mem
605            - detach-time
606            - dmabuf
607            - io-uring
608      dump:
609        reply: *pp-reply
610      config-cond: page-pool
611    -
612      name: page-pool-add-ntf
613      doc: Notification about page pool appearing.
614      notify: page-pool-get
615      mcgrp: page-pool
616      config-cond: page-pool
617    -
618      name: page-pool-del-ntf
619      doc: Notification about page pool disappearing.
620      notify: page-pool-get
621      mcgrp: page-pool
622      config-cond: page-pool
623    -
624      name: page-pool-change-ntf
625      doc: Notification about page pool configuration being changed.
626      notify: page-pool-get
627      mcgrp: page-pool
628      config-cond: page-pool
629    -
630      name: page-pool-stats-get
631      doc: Get page pool statistics.
632      attribute-set: page-pool-stats
633      do:
634        request:
635          attributes:
636            - info
637        reply: &pp-stats-reply
638          attributes:
639            - info
640            - alloc-fast
641            - alloc-slow
642            - alloc-slow-high-order
643            - alloc-empty
644            - alloc-refill
645            - alloc-waive
646            - recycle-cached
647            - recycle-cache-full
648            - recycle-ring
649            - recycle-ring-full
650            - recycle-released-refcnt
651      dump:
652        reply: *pp-stats-reply
653      config-cond: page-pool-stats
654    -
655      name: queue-get
656      doc: Get queue information from the kernel.
657           Only configured queues will be reported (as opposed to all available
658           hardware queues).
659      attribute-set: queue
660      do:
661        request:
662          attributes:
663            - ifindex
664            - type
665            - id
666        reply: &queue-get-op
667          attributes:
668            - id
669            - type
670            - napi-id
671            - ifindex
672            - dmabuf
673            - io-uring
674            - xsk
675      dump:
676        request:
677          attributes:
678            - ifindex
679        reply: *queue-get-op
680    -
681      name: napi-get
682      doc: Get information about NAPI instances configured on the system.
683      attribute-set: napi
684      do:
685        request:
686          attributes:
687            - id
688        reply: &napi-get-op
689          attributes:
690            - id
691            - ifindex
692            - irq
693            - pid
694            - defer-hard-irqs
695            - gro-flush-timeout
696            - irq-suspend-timeout
697      dump:
698        request:
699          attributes:
700            - ifindex
701        reply: *napi-get-op
702    -
703      name: qstats-get
704      doc: |
705        Get / dump fine grained statistics. Which statistics are reported
706        depends on the device and the driver, and whether the driver stores
707        software counters per-queue.
708      attribute-set: qstats
709      dump:
710        request:
711          attributes:
712            - ifindex
713            - scope
714        reply:
715          attributes:
716            - ifindex
717            - queue-type
718            - queue-id
719            - rx-packets
720            - rx-bytes
721            - tx-packets
722            - tx-bytes
723    -
724      name: bind-rx
725      doc: Bind dmabuf to netdev
726      attribute-set: dmabuf
727      flags: [admin-perm]
728      do:
729        request:
730          attributes:
731            - ifindex
732            - fd
733            - queues
734        reply:
735          attributes:
736            - id
737    -
738      name: napi-set
739      doc: Set configurable NAPI instance settings.
740      attribute-set: napi
741      flags: [admin-perm]
742      do:
743        request:
744          attributes:
745            - id
746            - defer-hard-irqs
747            - gro-flush-timeout
748            - irq-suspend-timeout
749    -
750      name: bind-tx
751      doc: Bind dmabuf to netdev for TX
752      attribute-set: dmabuf
753      do:
754        request:
755          attributes:
756            - ifindex
757            - fd
758        reply:
759          attributes:
760            - id
761
762kernel-family:
763  headers: ["net/netdev_netlink.h"]
764  sock-priv: struct netdev_nl_sock
765
766mcast-groups:
767  list:
768    -
769      name: mgmt
770    -
771      name: page-pool
772