xref: /linux/Documentation/netlink/specs/netdev.yaml (revision 5b36bef444432b75e7285e33338eb8bad53fe152)
1# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
2---
3name: netdev
4
5doc:
6  netdev configuration over generic netlink.
7
8definitions:
9  -
10    type: flags
11    name: xdp-act
12    render-max: true
13    entries:
14      -
15        name: basic
16        doc:
17          XDP features set supported by all drivers
18          (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
19      -
20        name: redirect
21        doc:
22          The netdev supports XDP_REDIRECT
23      -
24        name: ndo-xmit
25        doc:
26          This feature informs if netdev implements ndo_xdp_xmit callback.
27      -
28        name: xsk-zerocopy
29        doc:
30          This feature informs if netdev supports AF_XDP in zero copy mode.
31      -
32        name: hw-offload
33        doc:
34          This feature informs if netdev supports XDP hw offloading.
35      -
36        name: rx-sg
37        doc:
38          This feature informs if netdev implements non-linear XDP buffer
39          support in the driver napi callback.
40      -
41        name: ndo-xmit-sg
42        doc:
43          This feature informs if netdev implements non-linear XDP buffer
44          support in ndo_xdp_xmit callback.
45  -
46    type: flags
47    name: xdp-rx-metadata
48    entries:
49      -
50        name: timestamp
51        doc: |
52          Device is capable of exposing receive HW timestamp via
53          bpf_xdp_metadata_rx_timestamp().
54      -
55        name: hash
56        doc: |
57          Device is capable of exposing receive packet hash via
58          bpf_xdp_metadata_rx_hash().
59      -
60        name: vlan-tag
61        doc: |
62          Device is capable of exposing receive packet VLAN tag via
63          bpf_xdp_metadata_rx_vlan_tag().
64  -
65    type: flags
66    name: xsk-flags
67    entries:
68      -
69        name: tx-timestamp
70        doc:
71          HW timestamping egress packets is supported by the driver.
72      -
73        name: tx-checksum
74        doc:
75          L3 checksum HW offload is supported by the driver.
76      -
77        name: tx-launch-time-fifo
78        doc:
79          Launch time HW offload is supported by the driver.
80  -
81    name: queue-type
82    type: enum
83    entries: [rx, tx]
84  -
85    name: qstats-scope
86    type: flags
87    entries: [queue]
88
89attribute-sets:
90  -
91    name: dev
92    attributes:
93      -
94        name: ifindex
95        doc: netdev ifindex
96        type: u32
97        checks:
98          min: 1
99      -
100        name: pad
101        type: pad
102      -
103        name: xdp-features
104        doc: Bitmask of enabled xdp-features.
105        type: u64
106        enum: xdp-act
107      -
108        name: xdp-zc-max-segs
109        doc: max fragment count supported by ZC driver
110        type: u32
111        checks:
112          min: 1
113      -
114        name: xdp-rx-metadata-features
115        doc: Bitmask of supported XDP receive metadata features.
116             See Documentation/networking/xdp-rx-metadata.rst for more details.
117        type: u64
118        enum: xdp-rx-metadata
119      -
120        name: xsk-features
121        doc: Bitmask of enabled AF_XDP features.
122        type: u64
123        enum: xsk-flags
124  -
125    name: io-uring-provider-info
126    attributes: []
127  -
128    name: page-pool
129    attributes:
130      -
131        name: id
132        doc: Unique ID of a Page Pool instance.
133        type: uint
134        checks:
135          min: 1
136          max: u32-max
137      -
138        name: ifindex
139        doc: |
140          ifindex of the netdev to which the pool belongs.
141          May be reported as 0 if the page pool was allocated for a netdev
142          which got destroyed already (page pools may outlast their netdevs
143          because they wait for all memory to be returned).
144        type: u32
145        checks:
146          min: 1
147          max: s32-max
148      -
149        name: napi-id
150        doc: Id of NAPI using this Page Pool instance.
151        type: uint
152        checks:
153          min: 1
154          max: u32-max
155      -
156        name: inflight
157        type: uint
158        doc: |
159          Number of outstanding references to this page pool (allocated
160          but yet to be freed pages). Allocated pages may be held in
161          socket receive queues, driver receive ring, page pool recycling
162          ring, the page pool cache, etc.
163      -
164        name: inflight-mem
165        type: uint
166        doc: |
167          Amount of memory held by inflight pages.
168      -
169        name: detach-time
170        type: uint
171        doc: |
172          Seconds in CLOCK_BOOTTIME of when Page Pool was detached by
173          the driver. Once detached Page Pool can no longer be used to
174          allocate memory.
175          Page Pools wait for all the memory allocated from them to be freed
176          before truly disappearing. "Detached" Page Pools cannot be
177          "re-attached", they are just waiting to disappear.
178          Attribute is absent if Page Pool has not been detached, and
179          can still be used to allocate new memory.
180      -
181        name: dmabuf
182        doc: ID of the dmabuf this page-pool is attached to.
183        type: u32
184      -
185        name: io-uring
186        doc: io-uring memory provider information.
187        type: nest
188        nested-attributes: io-uring-provider-info
189  -
190    name: page-pool-info
191    subset-of: page-pool
192    attributes:
193      -
194        name: id
195      -
196        name: ifindex
197  -
198    name: page-pool-stats
199    doc: |
200      Page pool statistics, see docs for struct page_pool_stats
201      for information about individual statistics.
202    attributes:
203      -
204        name: info
205        doc: Page pool identifying information.
206        type: nest
207        nested-attributes: page-pool-info
208      -
209        name: alloc-fast
210        type: uint
211        value: 8  # reserve some attr ids in case we need more metadata later
212      -
213        name: alloc-slow
214        type: uint
215      -
216        name: alloc-slow-high-order
217        type: uint
218      -
219        name: alloc-empty
220        type: uint
221      -
222        name: alloc-refill
223        type: uint
224      -
225        name: alloc-waive
226        type: uint
227      -
228        name: recycle-cached
229        type: uint
230      -
231        name: recycle-cache-full
232        type: uint
233      -
234        name: recycle-ring
235        type: uint
236      -
237        name: recycle-ring-full
238        type: uint
239      -
240        name: recycle-released-refcnt
241        type: uint
242
243  -
244    name: napi
245    attributes:
246      -
247        name: ifindex
248        doc: ifindex of the netdevice to which NAPI instance belongs.
249        type: u32
250        checks:
251          min: 1
252      -
253        name: id
254        doc: ID of the NAPI instance.
255        type: u32
256      -
257        name: irq
258        doc: The associated interrupt vector number for the napi
259        type: u32
260      -
261        name: pid
262        doc: PID of the napi thread, if NAPI is configured to operate in
263             threaded mode. If NAPI is not in threaded mode (i.e. uses normal
264             softirq context), the attribute will be absent.
265        type: u32
266      -
267        name: defer-hard-irqs
268        doc: The number of consecutive empty polls before IRQ deferral ends
269             and hardware IRQs are re-enabled.
270        type: u32
271        checks:
272          max: s32-max
273      -
274        name: gro-flush-timeout
275        doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog
276             timer which schedules NAPI processing. Additionally, a non-zero
277             value will also prevent GRO from flushing recent super-frames at
278             the end of a NAPI cycle. This may add receive latency in exchange
279             for reducing the number of frames processed by the network stack.
280        type: uint
281      -
282        name: irq-suspend-timeout
283        doc: The timeout, in nanoseconds, of how long to suspend irq
284             processing, if event polling finds events
285        type: uint
286      -
287        name: threaded
288        doc: Whether the NAPI is configured to operate in threaded polling
289             mode. If this is set to 1 then the NAPI context operates in
290             threaded polling mode.
291        type: uint
292        checks:
293          max: 1
294  -
295    name: xsk-info
296    attributes: []
297  -
298    name: queue
299    attributes:
300      -
301        name: id
302        doc: Queue index; most queue types are indexed like a C array, with
303             indexes starting at 0 and ending at queue count - 1. Queue indexes
304             are scoped to an interface and queue type.
305        type: u32
306      -
307        name: ifindex
308        doc: ifindex of the netdevice to which the queue belongs.
309        type: u32
310        checks:
311          min: 1
312      -
313        name: type
314        doc: Queue type as rx, tx. Each queue type defines a separate ID space.
315             XDP TX queues allocated in the kernel are not linked to NAPIs and
316             thus not listed. AF_XDP queues will have more information set in
317             the xsk attribute.
318        type: u32
319        enum: queue-type
320      -
321        name: napi-id
322        doc: ID of the NAPI instance which services this queue.
323        type: u32
324      -
325        name: dmabuf
326        doc: ID of the dmabuf attached to this queue, if any.
327        type: u32
328      -
329        name: io-uring
330        doc: io_uring memory provider information.
331        type: nest
332        nested-attributes: io-uring-provider-info
333      -
334        name: xsk
335        doc: XSK information for this queue, if any.
336        type: nest
337        nested-attributes: xsk-info
338  -
339    name: qstats
340    doc: |
341      Get device statistics, scoped to a device or a queue.
342      These statistics extend (and partially duplicate) statistics available
343      in struct rtnl_link_stats64.
344      Value of the `scope` attribute determines how statistics are
345      aggregated. When aggregated for the entire device the statistics
346      represent the total number of events since last explicit reset of
347      the device (i.e. not a reconfiguration like changing queue count).
348      When reported per-queue, however, the statistics may not add
349      up to the total number of events, will only be reported for currently
350      active objects, and will likely report the number of events since last
351      reconfiguration.
352    attributes:
353      -
354        name: ifindex
355        doc: ifindex of the netdevice to which stats belong.
356        type: u32
357        checks:
358          min: 1
359      -
360        name: queue-type
361        doc: Queue type as rx, tx, for queue-id.
362        type: u32
363        enum: queue-type
364      -
365        name: queue-id
366        doc: Queue ID, if stats are scoped to a single queue instance.
367        type: u32
368      -
369        name: scope
370        doc: |
371          What object type should be used to iterate over the stats.
372        type: uint
373        enum: qstats-scope
374      -
375        name: rx-packets
376        doc: |
377          Number of wire packets successfully received and passed to the stack.
378          For drivers supporting XDP, XDP is considered the first layer
379          of the stack, so packets consumed by XDP are still counted here.
380        type: uint
381        value: 8  # reserve some attr ids in case we need more metadata later
382      -
383        name: rx-bytes
384        doc: Successfully received bytes, see `rx-packets`.
385        type: uint
386      -
387        name: tx-packets
388        doc: |
389          Number of wire packets successfully sent. Packet is considered to be
390          successfully sent once it is in device memory (usually this means
391          the device has issued a DMA completion for the packet).
392        type: uint
393      -
394        name: tx-bytes
395        doc: Successfully sent bytes, see `tx-packets`.
396        type: uint
397      -
398        name: rx-alloc-fail
399        doc: |
400          Number of times skb or buffer allocation failed on the Rx datapath.
401          Allocation failure may, or may not result in a packet drop, depending
402          on driver implementation and whether system recovers quickly.
403        type: uint
404      -
405        name: rx-hw-drops
406        doc: |
407          Number of all packets which entered the device, but never left it,
408          including but not limited to: packets dropped due to lack of buffer
409          space, processing errors, explicit or implicit policies and packet
410          filters.
411        type: uint
412      -
413        name: rx-hw-drop-overruns
414        doc: |
415          Number of packets dropped due to transient lack of resources, such as
416          buffer space, host descriptors etc.
417        type: uint
418      -
419        name: rx-csum-complete
420        doc: Number of packets that were marked as CHECKSUM_COMPLETE.
421        type: uint
422      -
423        name: rx-csum-unnecessary
424        doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
425        type: uint
426      -
427        name: rx-csum-none
428        doc: Number of packets that were not checksummed by device.
429        type: uint
430      -
431        name: rx-csum-bad
432        doc: |
433          Number of packets with bad checksum. The packets are not discarded,
434          but still delivered to the stack.
435        type: uint
436      -
437        name: rx-hw-gro-packets
438        doc: |
439          Number of packets that were coalesced from smaller packets by the
440          device. Counts only packets coalesced with the HW-GRO netdevice
441          feature, LRO-coalesced packets are not counted.
442        type: uint
443      -
444        name: rx-hw-gro-bytes
445        doc: See `rx-hw-gro-packets`.
446        type: uint
447      -
448        name: rx-hw-gro-wire-packets
449        doc: |
450          Number of packets that were coalesced to bigger packetss with the
451          HW-GRO netdevice feature. LRO-coalesced packets are not counted.
452        type: uint
453      -
454        name: rx-hw-gro-wire-bytes
455        doc: See `rx-hw-gro-wire-packets`.
456        type: uint
457      -
458        name: rx-hw-drop-ratelimits
459        doc: |
460          Number of the packets dropped by the device due to the received
461          packets bitrate exceeding the device rate limit.
462        type: uint
463      -
464        name: tx-hw-drops
465        doc: |
466          Number of packets that arrived at the device but never left it,
467          encompassing packets dropped for reasons such as processing errors, as
468          well as those affected by explicitly defined policies and packet
469          filtering criteria.
470        type: uint
471      -
472        name: tx-hw-drop-errors
473        doc: Number of packets dropped because they were invalid or malformed.
474        type: uint
475      -
476        name: tx-csum-none
477        doc: |
478          Number of packets that did not require the device to calculate the
479          checksum.
480        type: uint
481      -
482        name: tx-needs-csum
483        doc: |
484          Number of packets that required the device to calculate the checksum.
485          This counter includes the number of GSO wire packets for which device
486          calculated the L4 checksum.
487        type: uint
488      -
489        name: tx-hw-gso-packets
490        doc: |
491          Number of packets that necessitated segmentation into smaller packets
492          by the device.
493        type: uint
494      -
495        name: tx-hw-gso-bytes
496        doc: See `tx-hw-gso-packets`.
497        type: uint
498      -
499        name: tx-hw-gso-wire-packets
500        doc: |
501          Number of wire-sized packets generated by processing
502          `tx-hw-gso-packets`
503        type: uint
504      -
505        name: tx-hw-gso-wire-bytes
506        doc: See `tx-hw-gso-wire-packets`.
507        type: uint
508      -
509        name: tx-hw-drop-ratelimits
510        doc: |
511          Number of the packets dropped by the device due to the transmit
512          packets bitrate exceeding the device rate limit.
513        type: uint
514      -
515        name: tx-stop
516        doc: |
517          Number of times driver paused accepting new tx packets
518          from the stack to this queue, because the queue was full.
519          Note that if BQL is supported and enabled on the device
520          the networking stack will avoid queuing a lot of data at once.
521        type: uint
522      -
523        name: tx-wake
524        doc: |
525          Number of times driver re-started accepting send
526          requests to this queue from the stack.
527        type: uint
528  -
529    name: queue-id
530    subset-of: queue
531    attributes:
532      -
533        name: id
534      -
535        name: type
536  -
537    name: dmabuf
538    attributes:
539      -
540        name: ifindex
541        doc: netdev ifindex to bind the dmabuf to.
542        type: u32
543        checks:
544          min: 1
545      -
546        name: queues
547        doc: receive queues to bind the dmabuf to.
548        type: nest
549        nested-attributes: queue-id
550        multi-attr: true
551      -
552        name: fd
553        doc: dmabuf file descriptor to bind.
554        type: u32
555      -
556        name: id
557        doc: id of the dmabuf binding
558        type: u32
559        checks:
560          min: 1
561
562operations:
563  list:
564    -
565      name: dev-get
566      doc: Get / dump information about a netdev.
567      attribute-set: dev
568      do:
569        request:
570          attributes:
571            - ifindex
572        reply: &dev-all
573          attributes:
574            - ifindex
575            - xdp-features
576            - xdp-zc-max-segs
577            - xdp-rx-metadata-features
578            - xsk-features
579      dump:
580        reply: *dev-all
581    -
582      name: dev-add-ntf
583      doc: Notification about device appearing.
584      notify: dev-get
585      mcgrp: mgmt
586    -
587      name: dev-del-ntf
588      doc: Notification about device disappearing.
589      notify: dev-get
590      mcgrp: mgmt
591    -
592      name: dev-change-ntf
593      doc: Notification about device configuration being changed.
594      notify: dev-get
595      mcgrp: mgmt
596    -
597      name: page-pool-get
598      doc: |
599        Get / dump information about Page Pools.
600        (Only Page Pools associated with a net_device can be listed.)
601      attribute-set: page-pool
602      do:
603        request:
604          attributes:
605            - id
606        reply: &pp-reply
607          attributes:
608            - id
609            - ifindex
610            - napi-id
611            - inflight
612            - inflight-mem
613            - detach-time
614            - dmabuf
615            - io-uring
616      dump:
617        reply: *pp-reply
618      config-cond: page-pool
619    -
620      name: page-pool-add-ntf
621      doc: Notification about page pool appearing.
622      notify: page-pool-get
623      mcgrp: page-pool
624      config-cond: page-pool
625    -
626      name: page-pool-del-ntf
627      doc: Notification about page pool disappearing.
628      notify: page-pool-get
629      mcgrp: page-pool
630      config-cond: page-pool
631    -
632      name: page-pool-change-ntf
633      doc: Notification about page pool configuration being changed.
634      notify: page-pool-get
635      mcgrp: page-pool
636      config-cond: page-pool
637    -
638      name: page-pool-stats-get
639      doc: Get page pool statistics.
640      attribute-set: page-pool-stats
641      do:
642        request:
643          attributes:
644            - info
645        reply: &pp-stats-reply
646          attributes:
647            - info
648            - alloc-fast
649            - alloc-slow
650            - alloc-slow-high-order
651            - alloc-empty
652            - alloc-refill
653            - alloc-waive
654            - recycle-cached
655            - recycle-cache-full
656            - recycle-ring
657            - recycle-ring-full
658            - recycle-released-refcnt
659      dump:
660        reply: *pp-stats-reply
661      config-cond: page-pool-stats
662    -
663      name: queue-get
664      doc: Get queue information from the kernel.
665           Only configured queues will be reported (as opposed to all available
666           hardware queues).
667      attribute-set: queue
668      do:
669        request:
670          attributes:
671            - ifindex
672            - type
673            - id
674        reply: &queue-get-op
675          attributes:
676            - id
677            - type
678            - napi-id
679            - ifindex
680            - dmabuf
681            - io-uring
682            - xsk
683      dump:
684        request:
685          attributes:
686            - ifindex
687        reply: *queue-get-op
688    -
689      name: napi-get
690      doc: Get information about NAPI instances configured on the system.
691      attribute-set: napi
692      do:
693        request:
694          attributes:
695            - id
696        reply: &napi-get-op
697          attributes:
698            - id
699            - ifindex
700            - irq
701            - pid
702            - defer-hard-irqs
703            - gro-flush-timeout
704            - irq-suspend-timeout
705            - threaded
706      dump:
707        request:
708          attributes:
709            - ifindex
710        reply: *napi-get-op
711    -
712      name: qstats-get
713      doc: |
714        Get / dump fine grained statistics. Which statistics are reported
715        depends on the device and the driver, and whether the driver stores
716        software counters per-queue.
717      attribute-set: qstats
718      dump:
719        request:
720          attributes:
721            - ifindex
722            - scope
723        reply:
724          attributes:
725            - ifindex
726            - queue-type
727            - queue-id
728            - rx-packets
729            - rx-bytes
730            - tx-packets
731            - tx-bytes
732    -
733      name: bind-rx
734      doc: Bind dmabuf to netdev
735      attribute-set: dmabuf
736      flags: [admin-perm]
737      do:
738        request:
739          attributes:
740            - ifindex
741            - fd
742            - queues
743        reply:
744          attributes:
745            - id
746    -
747      name: napi-set
748      doc: Set configurable NAPI instance settings.
749      attribute-set: napi
750      flags: [admin-perm]
751      do:
752        request:
753          attributes:
754            - id
755            - defer-hard-irqs
756            - gro-flush-timeout
757            - irq-suspend-timeout
758            - threaded
759    -
760      name: bind-tx
761      doc: Bind dmabuf to netdev for TX
762      attribute-set: dmabuf
763      do:
764        request:
765          attributes:
766            - ifindex
767            - fd
768        reply:
769          attributes:
770            - id
771
772kernel-family:
773  headers: ["net/netdev_netlink.h"]
774  sock-priv: struct netdev_nl_sock
775
776mcast-groups:
777  list:
778    -
779      name: mgmt
780    -
781      name: page-pool
782