xref: /linux/Documentation/netlink/specs/netdev.yaml (revision 8be4d31cb8aaeea27bde4b7ddb26e28a89062ebf)
1# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
2---
3name: netdev
4
5doc:
6  netdev configuration over generic netlink.
7
8definitions:
9  -
10    type: flags
11    name: xdp-act
12    render-max: true
13    entries:
14      -
15        name: basic
16        doc:
17          XDP features set supported by all drivers
18          (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
19      -
20        name: redirect
21        doc:
22          The netdev supports XDP_REDIRECT
23      -
24        name: ndo-xmit
25        doc:
26          This feature informs if netdev implements ndo_xdp_xmit callback.
27      -
28        name: xsk-zerocopy
29        doc:
30          This feature informs if netdev supports AF_XDP in zero copy mode.
31      -
32        name: hw-offload
33        doc:
34          This feature informs if netdev supports XDP hw offloading.
35      -
36        name: rx-sg
37        doc:
38          This feature informs if netdev implements non-linear XDP buffer
39          support in the driver napi callback.
40      -
41        name: ndo-xmit-sg
42        doc:
43          This feature informs if netdev implements non-linear XDP buffer
44          support in ndo_xdp_xmit callback.
45  -
46    type: flags
47    name: xdp-rx-metadata
48    entries:
49      -
50        name: timestamp
51        doc: |
52          Device is capable of exposing receive HW timestamp via
53          bpf_xdp_metadata_rx_timestamp().
54      -
55        name: hash
56        doc: |
57          Device is capable of exposing receive packet hash via
58          bpf_xdp_metadata_rx_hash().
59      -
60        name: vlan-tag
61        doc: |
62          Device is capable of exposing receive packet VLAN tag via
63          bpf_xdp_metadata_rx_vlan_tag().
64  -
65    type: flags
66    name: xsk-flags
67    entries:
68      -
69        name: tx-timestamp
70        doc:
71          HW timestamping egress packets is supported by the driver.
72      -
73        name: tx-checksum
74        doc:
75          L3 checksum HW offload is supported by the driver.
76      -
77        name: tx-launch-time-fifo
78        doc:
79          Launch time HW offload is supported by the driver.
80  -
81    name: queue-type
82    type: enum
83    entries: [rx, tx]
84  -
85    name: qstats-scope
86    type: flags
87    entries: [queue]
88  -
89    name: napi-threaded
90    type: enum
91    entries: [disabled, enabled]
92
93attribute-sets:
94  -
95    name: dev
96    attributes:
97      -
98        name: ifindex
99        doc: netdev ifindex
100        type: u32
101        checks:
102          min: 1
103      -
104        name: pad
105        type: pad
106      -
107        name: xdp-features
108        doc: Bitmask of enabled xdp-features.
109        type: u64
110        enum: xdp-act
111      -
112        name: xdp-zc-max-segs
113        doc: max fragment count supported by ZC driver
114        type: u32
115        checks:
116          min: 1
117      -
118        name: xdp-rx-metadata-features
119        doc: Bitmask of supported XDP receive metadata features.
120             See Documentation/networking/xdp-rx-metadata.rst for more details.
121        type: u64
122        enum: xdp-rx-metadata
123      -
124        name: xsk-features
125        doc: Bitmask of enabled AF_XDP features.
126        type: u64
127        enum: xsk-flags
128  -
129    name: io-uring-provider-info
130    attributes: []
131  -
132    name: page-pool
133    attributes:
134      -
135        name: id
136        doc: Unique ID of a Page Pool instance.
137        type: uint
138        checks:
139          min: 1
140          max: u32-max
141      -
142        name: ifindex
143        doc: |
144          ifindex of the netdev to which the pool belongs.
145          May be reported as 0 if the page pool was allocated for a netdev
146          which got destroyed already (page pools may outlast their netdevs
147          because they wait for all memory to be returned).
148        type: u32
149        checks:
150          min: 1
151          max: s32-max
152      -
153        name: napi-id
154        doc: Id of NAPI using this Page Pool instance.
155        type: uint
156        checks:
157          min: 1
158          max: u32-max
159      -
160        name: inflight
161        type: uint
162        doc: |
163          Number of outstanding references to this page pool (allocated
164          but yet to be freed pages). Allocated pages may be held in
165          socket receive queues, driver receive ring, page pool recycling
166          ring, the page pool cache, etc.
167      -
168        name: inflight-mem
169        type: uint
170        doc: |
171          Amount of memory held by inflight pages.
172      -
173        name: detach-time
174        type: uint
175        doc: |
176          Seconds in CLOCK_BOOTTIME of when Page Pool was detached by
177          the driver. Once detached Page Pool can no longer be used to
178          allocate memory.
179          Page Pools wait for all the memory allocated from them to be freed
180          before truly disappearing. "Detached" Page Pools cannot be
181          "re-attached", they are just waiting to disappear.
182          Attribute is absent if Page Pool has not been detached, and
183          can still be used to allocate new memory.
184      -
185        name: dmabuf
186        doc: ID of the dmabuf this page-pool is attached to.
187        type: u32
188      -
189        name: io-uring
190        doc: io-uring memory provider information.
191        type: nest
192        nested-attributes: io-uring-provider-info
193  -
194    name: page-pool-info
195    subset-of: page-pool
196    attributes:
197      -
198        name: id
199      -
200        name: ifindex
201  -
202    name: page-pool-stats
203    doc: |
204      Page pool statistics, see docs for struct page_pool_stats
205      for information about individual statistics.
206    attributes:
207      -
208        name: info
209        doc: Page pool identifying information.
210        type: nest
211        nested-attributes: page-pool-info
212      -
213        name: alloc-fast
214        type: uint
215        value: 8  # reserve some attr ids in case we need more metadata later
216      -
217        name: alloc-slow
218        type: uint
219      -
220        name: alloc-slow-high-order
221        type: uint
222      -
223        name: alloc-empty
224        type: uint
225      -
226        name: alloc-refill
227        type: uint
228      -
229        name: alloc-waive
230        type: uint
231      -
232        name: recycle-cached
233        type: uint
234      -
235        name: recycle-cache-full
236        type: uint
237      -
238        name: recycle-ring
239        type: uint
240      -
241        name: recycle-ring-full
242        type: uint
243      -
244        name: recycle-released-refcnt
245        type: uint
246
247  -
248    name: napi
249    attributes:
250      -
251        name: ifindex
252        doc: ifindex of the netdevice to which NAPI instance belongs.
253        type: u32
254        checks:
255          min: 1
256      -
257        name: id
258        doc: ID of the NAPI instance.
259        type: u32
260      -
261        name: irq
262        doc: The associated interrupt vector number for the napi
263        type: u32
264      -
265        name: pid
266        doc: PID of the napi thread, if NAPI is configured to operate in
267             threaded mode. If NAPI is not in threaded mode (i.e. uses normal
268             softirq context), the attribute will be absent.
269        type: u32
270      -
271        name: defer-hard-irqs
272        doc: The number of consecutive empty polls before IRQ deferral ends
273             and hardware IRQs are re-enabled.
274        type: u32
275        checks:
276          max: s32-max
277      -
278        name: gro-flush-timeout
279        doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog
280             timer which schedules NAPI processing. Additionally, a non-zero
281             value will also prevent GRO from flushing recent super-frames at
282             the end of a NAPI cycle. This may add receive latency in exchange
283             for reducing the number of frames processed by the network stack.
284        type: uint
285      -
286        name: irq-suspend-timeout
287        doc: The timeout, in nanoseconds, of how long to suspend irq
288             processing, if event polling finds events
289        type: uint
290      -
291        name: threaded
292        doc: Whether the NAPI is configured to operate in threaded polling
293             mode. If this is set to enabled then the NAPI context operates
294             in threaded polling mode.
295        type: u32
296        enum: napi-threaded
297  -
298    name: xsk-info
299    attributes: []
300  -
301    name: queue
302    attributes:
303      -
304        name: id
305        doc: Queue index; most queue types are indexed like a C array, with
306             indexes starting at 0 and ending at queue count - 1. Queue indexes
307             are scoped to an interface and queue type.
308        type: u32
309      -
310        name: ifindex
311        doc: ifindex of the netdevice to which the queue belongs.
312        type: u32
313        checks:
314          min: 1
315      -
316        name: type
317        doc: Queue type as rx, tx. Each queue type defines a separate ID space.
318             XDP TX queues allocated in the kernel are not linked to NAPIs and
319             thus not listed. AF_XDP queues will have more information set in
320             the xsk attribute.
321        type: u32
322        enum: queue-type
323      -
324        name: napi-id
325        doc: ID of the NAPI instance which services this queue.
326        type: u32
327      -
328        name: dmabuf
329        doc: ID of the dmabuf attached to this queue, if any.
330        type: u32
331      -
332        name: io-uring
333        doc: io_uring memory provider information.
334        type: nest
335        nested-attributes: io-uring-provider-info
336      -
337        name: xsk
338        doc: XSK information for this queue, if any.
339        type: nest
340        nested-attributes: xsk-info
341  -
342    name: qstats
343    doc: |
344      Get device statistics, scoped to a device or a queue.
345      These statistics extend (and partially duplicate) statistics available
346      in struct rtnl_link_stats64.
347      Value of the `scope` attribute determines how statistics are
348      aggregated. When aggregated for the entire device the statistics
349      represent the total number of events since last explicit reset of
350      the device (i.e. not a reconfiguration like changing queue count).
351      When reported per-queue, however, the statistics may not add
352      up to the total number of events, will only be reported for currently
353      active objects, and will likely report the number of events since last
354      reconfiguration.
355    attributes:
356      -
357        name: ifindex
358        doc: ifindex of the netdevice to which stats belong.
359        type: u32
360        checks:
361          min: 1
362      -
363        name: queue-type
364        doc: Queue type as rx, tx, for queue-id.
365        type: u32
366        enum: queue-type
367      -
368        name: queue-id
369        doc: Queue ID, if stats are scoped to a single queue instance.
370        type: u32
371      -
372        name: scope
373        doc: |
374          What object type should be used to iterate over the stats.
375        type: uint
376        enum: qstats-scope
377      -
378        name: rx-packets
379        doc: |
380          Number of wire packets successfully received and passed to the stack.
381          For drivers supporting XDP, XDP is considered the first layer
382          of the stack, so packets consumed by XDP are still counted here.
383        type: uint
384        value: 8  # reserve some attr ids in case we need more metadata later
385      -
386        name: rx-bytes
387        doc: Successfully received bytes, see `rx-packets`.
388        type: uint
389      -
390        name: tx-packets
391        doc: |
392          Number of wire packets successfully sent. Packet is considered to be
393          successfully sent once it is in device memory (usually this means
394          the device has issued a DMA completion for the packet).
395        type: uint
396      -
397        name: tx-bytes
398        doc: Successfully sent bytes, see `tx-packets`.
399        type: uint
400      -
401        name: rx-alloc-fail
402        doc: |
403          Number of times skb or buffer allocation failed on the Rx datapath.
404          Allocation failure may, or may not result in a packet drop, depending
405          on driver implementation and whether system recovers quickly.
406        type: uint
407      -
408        name: rx-hw-drops
409        doc: |
410          Number of all packets which entered the device, but never left it,
411          including but not limited to: packets dropped due to lack of buffer
412          space, processing errors, explicit or implicit policies and packet
413          filters.
414        type: uint
415      -
416        name: rx-hw-drop-overruns
417        doc: |
418          Number of packets dropped due to transient lack of resources, such as
419          buffer space, host descriptors etc.
420        type: uint
421      -
422        name: rx-csum-complete
423        doc: Number of packets that were marked as CHECKSUM_COMPLETE.
424        type: uint
425      -
426        name: rx-csum-unnecessary
427        doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
428        type: uint
429      -
430        name: rx-csum-none
431        doc: Number of packets that were not checksummed by device.
432        type: uint
433      -
434        name: rx-csum-bad
435        doc: |
436          Number of packets with bad checksum. The packets are not discarded,
437          but still delivered to the stack.
438        type: uint
439      -
440        name: rx-hw-gro-packets
441        doc: |
442          Number of packets that were coalesced from smaller packets by the
443          device. Counts only packets coalesced with the HW-GRO netdevice
444          feature, LRO-coalesced packets are not counted.
445        type: uint
446      -
447        name: rx-hw-gro-bytes
448        doc: See `rx-hw-gro-packets`.
449        type: uint
450      -
451        name: rx-hw-gro-wire-packets
452        doc: |
453          Number of packets that were coalesced to bigger packetss with the
454          HW-GRO netdevice feature. LRO-coalesced packets are not counted.
455        type: uint
456      -
457        name: rx-hw-gro-wire-bytes
458        doc: See `rx-hw-gro-wire-packets`.
459        type: uint
460      -
461        name: rx-hw-drop-ratelimits
462        doc: |
463          Number of the packets dropped by the device due to the received
464          packets bitrate exceeding the device rate limit.
465        type: uint
466      -
467        name: tx-hw-drops
468        doc: |
469          Number of packets that arrived at the device but never left it,
470          encompassing packets dropped for reasons such as processing errors, as
471          well as those affected by explicitly defined policies and packet
472          filtering criteria.
473        type: uint
474      -
475        name: tx-hw-drop-errors
476        doc: Number of packets dropped because they were invalid or malformed.
477        type: uint
478      -
479        name: tx-csum-none
480        doc: |
481          Number of packets that did not require the device to calculate the
482          checksum.
483        type: uint
484      -
485        name: tx-needs-csum
486        doc: |
487          Number of packets that required the device to calculate the checksum.
488          This counter includes the number of GSO wire packets for which device
489          calculated the L4 checksum.
490        type: uint
491      -
492        name: tx-hw-gso-packets
493        doc: |
494          Number of packets that necessitated segmentation into smaller packets
495          by the device.
496        type: uint
497      -
498        name: tx-hw-gso-bytes
499        doc: See `tx-hw-gso-packets`.
500        type: uint
501      -
502        name: tx-hw-gso-wire-packets
503        doc: |
504          Number of wire-sized packets generated by processing
505          `tx-hw-gso-packets`
506        type: uint
507      -
508        name: tx-hw-gso-wire-bytes
509        doc: See `tx-hw-gso-wire-packets`.
510        type: uint
511      -
512        name: tx-hw-drop-ratelimits
513        doc: |
514          Number of the packets dropped by the device due to the transmit
515          packets bitrate exceeding the device rate limit.
516        type: uint
517      -
518        name: tx-stop
519        doc: |
520          Number of times driver paused accepting new tx packets
521          from the stack to this queue, because the queue was full.
522          Note that if BQL is supported and enabled on the device
523          the networking stack will avoid queuing a lot of data at once.
524        type: uint
525      -
526        name: tx-wake
527        doc: |
528          Number of times driver re-started accepting send
529          requests to this queue from the stack.
530        type: uint
531  -
532    name: queue-id
533    subset-of: queue
534    attributes:
535      -
536        name: id
537      -
538        name: type
539  -
540    name: dmabuf
541    attributes:
542      -
543        name: ifindex
544        doc: netdev ifindex to bind the dmabuf to.
545        type: u32
546        checks:
547          min: 1
548      -
549        name: queues
550        doc: receive queues to bind the dmabuf to.
551        type: nest
552        nested-attributes: queue-id
553        multi-attr: true
554      -
555        name: fd
556        doc: dmabuf file descriptor to bind.
557        type: u32
558      -
559        name: id
560        doc: id of the dmabuf binding
561        type: u32
562        checks:
563          min: 1
564
565operations:
566  list:
567    -
568      name: dev-get
569      doc: Get / dump information about a netdev.
570      attribute-set: dev
571      do:
572        request:
573          attributes:
574            - ifindex
575        reply: &dev-all
576          attributes:
577            - ifindex
578            - xdp-features
579            - xdp-zc-max-segs
580            - xdp-rx-metadata-features
581            - xsk-features
582      dump:
583        reply: *dev-all
584    -
585      name: dev-add-ntf
586      doc: Notification about device appearing.
587      notify: dev-get
588      mcgrp: mgmt
589    -
590      name: dev-del-ntf
591      doc: Notification about device disappearing.
592      notify: dev-get
593      mcgrp: mgmt
594    -
595      name: dev-change-ntf
596      doc: Notification about device configuration being changed.
597      notify: dev-get
598      mcgrp: mgmt
599    -
600      name: page-pool-get
601      doc: |
602        Get / dump information about Page Pools.
603        (Only Page Pools associated with a net_device can be listed.)
604      attribute-set: page-pool
605      do:
606        request:
607          attributes:
608            - id
609        reply: &pp-reply
610          attributes:
611            - id
612            - ifindex
613            - napi-id
614            - inflight
615            - inflight-mem
616            - detach-time
617            - dmabuf
618            - io-uring
619      dump:
620        reply: *pp-reply
621      config-cond: page-pool
622    -
623      name: page-pool-add-ntf
624      doc: Notification about page pool appearing.
625      notify: page-pool-get
626      mcgrp: page-pool
627      config-cond: page-pool
628    -
629      name: page-pool-del-ntf
630      doc: Notification about page pool disappearing.
631      notify: page-pool-get
632      mcgrp: page-pool
633      config-cond: page-pool
634    -
635      name: page-pool-change-ntf
636      doc: Notification about page pool configuration being changed.
637      notify: page-pool-get
638      mcgrp: page-pool
639      config-cond: page-pool
640    -
641      name: page-pool-stats-get
642      doc: Get page pool statistics.
643      attribute-set: page-pool-stats
644      do:
645        request:
646          attributes:
647            - info
648        reply: &pp-stats-reply
649          attributes:
650            - info
651            - alloc-fast
652            - alloc-slow
653            - alloc-slow-high-order
654            - alloc-empty
655            - alloc-refill
656            - alloc-waive
657            - recycle-cached
658            - recycle-cache-full
659            - recycle-ring
660            - recycle-ring-full
661            - recycle-released-refcnt
662      dump:
663        reply: *pp-stats-reply
664      config-cond: page-pool-stats
665    -
666      name: queue-get
667      doc: Get queue information from the kernel.
668           Only configured queues will be reported (as opposed to all available
669           hardware queues).
670      attribute-set: queue
671      do:
672        request:
673          attributes:
674            - ifindex
675            - type
676            - id
677        reply: &queue-get-op
678          attributes:
679            - id
680            - type
681            - napi-id
682            - ifindex
683            - dmabuf
684            - io-uring
685            - xsk
686      dump:
687        request:
688          attributes:
689            - ifindex
690        reply: *queue-get-op
691    -
692      name: napi-get
693      doc: Get information about NAPI instances configured on the system.
694      attribute-set: napi
695      do:
696        request:
697          attributes:
698            - id
699        reply: &napi-get-op
700          attributes:
701            - id
702            - ifindex
703            - irq
704            - pid
705            - defer-hard-irqs
706            - gro-flush-timeout
707            - irq-suspend-timeout
708            - threaded
709      dump:
710        request:
711          attributes:
712            - ifindex
713        reply: *napi-get-op
714    -
715      name: qstats-get
716      doc: |
717        Get / dump fine grained statistics. Which statistics are reported
718        depends on the device and the driver, and whether the driver stores
719        software counters per-queue.
720      attribute-set: qstats
721      dump:
722        request:
723          attributes:
724            - ifindex
725            - scope
726        reply:
727          attributes:
728            - ifindex
729            - queue-type
730            - queue-id
731            - rx-packets
732            - rx-bytes
733            - tx-packets
734            - tx-bytes
735    -
736      name: bind-rx
737      doc: Bind dmabuf to netdev
738      attribute-set: dmabuf
739      flags: [admin-perm]
740      do:
741        request:
742          attributes:
743            - ifindex
744            - fd
745            - queues
746        reply:
747          attributes:
748            - id
749    -
750      name: napi-set
751      doc: Set configurable NAPI instance settings.
752      attribute-set: napi
753      flags: [admin-perm]
754      do:
755        request:
756          attributes:
757            - id
758            - defer-hard-irqs
759            - gro-flush-timeout
760            - irq-suspend-timeout
761            - threaded
762    -
763      name: bind-tx
764      doc: Bind dmabuf to netdev for TX
765      attribute-set: dmabuf
766      do:
767        request:
768          attributes:
769            - ifindex
770            - fd
771        reply:
772          attributes:
773            - id
774
775kernel-family:
776  headers: ["net/netdev_netlink.h"]
777  sock-priv: struct netdev_nl_sock
778
779mcast-groups:
780  list:
781    -
782      name: mgmt
783    -
784      name: page-pool
785