xref: /linux/Documentation/netlink/specs/netdev.yaml (revision 2f435137a0484f11b47554281091ef4908f8cb31)
1# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
2
3name: netdev
4
5doc:
6  netdev configuration over generic netlink.
7
8definitions:
9  -
10    type: flags
11    name: xdp-act
12    render-max: true
13    entries:
14      -
15        name: basic
16        doc:
17          XDP features set supported by all drivers
18          (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
19      -
20        name: redirect
21        doc:
22          The netdev supports XDP_REDIRECT
23      -
24        name: ndo-xmit
25        doc:
26          This feature informs if netdev implements ndo_xdp_xmit callback.
27      -
28        name: xsk-zerocopy
29        doc:
30          This feature informs if netdev supports AF_XDP in zero copy mode.
31      -
32        name: hw-offload
33        doc:
34         This feature informs if netdev supports XDP hw offloading.
35      -
36        name: rx-sg
37        doc:
38          This feature informs if netdev implements non-linear XDP buffer
39          support in the driver napi callback.
40      -
41        name: ndo-xmit-sg
42        doc:
43          This feature informs if netdev implements non-linear XDP buffer
44          support in ndo_xdp_xmit callback.
45  -
46    type: flags
47    name: xdp-rx-metadata
48    entries:
49      -
50        name: timestamp
51        doc:
52          Device is capable of exposing receive HW timestamp via bpf_xdp_metadata_rx_timestamp().
53      -
54        name: hash
55        doc:
56          Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash().
57      -
58        name: vlan-tag
59        doc:
60          Device is capable of exposing receive packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
61  -
62    type: flags
63    name: xsk-flags
64    entries:
65      -
66        name: tx-timestamp
67        doc:
68          HW timestamping egress packets is supported by the driver.
69      -
70        name: tx-checksum
71        doc:
72          L3 checksum HW offload is supported by the driver.
73  -
74    name: queue-type
75    type: enum
76    entries: [ rx, tx ]
77  -
78    name: qstats-scope
79    type: flags
80    entries: [ queue ]
81
82attribute-sets:
83  -
84    name: dev
85    attributes:
86      -
87        name: ifindex
88        doc: netdev ifindex
89        type: u32
90        checks:
91          min: 1
92      -
93        name: pad
94        type: pad
95      -
96        name: xdp-features
97        doc: Bitmask of enabled xdp-features.
98        type: u64
99        enum: xdp-act
100      -
101        name: xdp-zc-max-segs
102        doc: max fragment count supported by ZC driver
103        type: u32
104        checks:
105          min: 1
106      -
107        name: xdp-rx-metadata-features
108        doc: Bitmask of supported XDP receive metadata features.
109             See Documentation/networking/xdp-rx-metadata.rst for more details.
110        type: u64
111        enum: xdp-rx-metadata
112      -
113        name: xsk-features
114        doc: Bitmask of enabled AF_XDP features.
115        type: u64
116        enum: xsk-flags
117  -
118    name: io-uring-provider-info
119    attributes: []
120  -
121    name: page-pool
122    attributes:
123      -
124        name: id
125        doc: Unique ID of a Page Pool instance.
126        type: uint
127        checks:
128          min: 1
129          max: u32-max
130      -
131        name: ifindex
132        doc: |
133          ifindex of the netdev to which the pool belongs.
134          May be reported as 0 if the page pool was allocated for a netdev
135          which got destroyed already (page pools may outlast their netdevs
136          because they wait for all memory to be returned).
137        type: u32
138        checks:
139          min: 1
140          max: s32-max
141      -
142        name: napi-id
143        doc: Id of NAPI using this Page Pool instance.
144        type: uint
145        checks:
146          min: 1
147          max: u32-max
148      -
149        name: inflight
150        type: uint
151        doc: |
152          Number of outstanding references to this page pool (allocated
153          but yet to be freed pages). Allocated pages may be held in
154          socket receive queues, driver receive ring, page pool recycling
155          ring, the page pool cache, etc.
156      -
157        name: inflight-mem
158        type: uint
159        doc: |
160          Amount of memory held by inflight pages.
161      -
162        name: detach-time
163        type: uint
164        doc: |
165          Seconds in CLOCK_BOOTTIME of when Page Pool was detached by
166          the driver. Once detached Page Pool can no longer be used to
167          allocate memory.
168          Page Pools wait for all the memory allocated from them to be freed
169          before truly disappearing. "Detached" Page Pools cannot be
170          "re-attached", they are just waiting to disappear.
171          Attribute is absent if Page Pool has not been detached, and
172          can still be used to allocate new memory.
173      -
174        name: dmabuf
175        doc: ID of the dmabuf this page-pool is attached to.
176        type: u32
177      -
178        name: io-uring
179        doc: io-uring memory provider information.
180        type: nest
181        nested-attributes: io-uring-provider-info
182  -
183    name: page-pool-info
184    subset-of: page-pool
185    attributes:
186      -
187        name: id
188      -
189        name: ifindex
190  -
191    name: page-pool-stats
192    doc: |
193      Page pool statistics, see docs for struct page_pool_stats
194      for information about individual statistics.
195    attributes:
196      -
197        name: info
198        doc: Page pool identifying information.
199        type: nest
200        nested-attributes: page-pool-info
201      -
202        name: alloc-fast
203        type: uint
204        value: 8 # reserve some attr ids in case we need more metadata later
205      -
206        name: alloc-slow
207        type: uint
208      -
209        name: alloc-slow-high-order
210        type: uint
211      -
212        name: alloc-empty
213        type: uint
214      -
215        name: alloc-refill
216        type: uint
217      -
218        name: alloc-waive
219        type: uint
220      -
221        name: recycle-cached
222        type: uint
223      -
224        name: recycle-cache-full
225        type: uint
226      -
227        name: recycle-ring
228        type: uint
229      -
230        name: recycle-ring-full
231        type: uint
232      -
233        name: recycle-released-refcnt
234        type: uint
235
236  -
237    name: napi
238    attributes:
239      -
240        name: ifindex
241        doc: ifindex of the netdevice to which NAPI instance belongs.
242        type: u32
243        checks:
244          min: 1
245      -
246        name: id
247        doc: ID of the NAPI instance.
248        type: u32
249      -
250        name: irq
251        doc: The associated interrupt vector number for the napi
252        type: u32
253      -
254        name: pid
255        doc: PID of the napi thread, if NAPI is configured to operate in
256             threaded mode. If NAPI is not in threaded mode (i.e. uses normal
257             softirq context), the attribute will be absent.
258        type: u32
259      -
260        name: defer-hard-irqs
261        doc: The number of consecutive empty polls before IRQ deferral ends
262             and hardware IRQs are re-enabled.
263        type: u32
264        checks:
265          max: s32-max
266      -
267        name: gro-flush-timeout
268        doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog
269             timer which schedules NAPI processing. Additionally, a non-zero
270             value will also prevent GRO from flushing recent super-frames at
271             the end of a NAPI cycle. This may add receive latency in exchange
272             for reducing the number of frames processed by the network stack.
273        type: uint
274      -
275        name: irq-suspend-timeout
276        doc: The timeout, in nanoseconds, of how long to suspend irq
277             processing, if event polling finds events
278        type: uint
279  -
280    name: queue
281    attributes:
282      -
283        name: id
284        doc: Queue index; most queue types are indexed like a C array, with
285             indexes starting at 0 and ending at queue count - 1. Queue indexes
286             are scoped to an interface and queue type.
287        type: u32
288      -
289        name: ifindex
290        doc: ifindex of the netdevice to which the queue belongs.
291        type: u32
292        checks:
293          min: 1
294      -
295        name: type
296        doc: Queue type as rx, tx. Each queue type defines a separate ID space.
297        type: u32
298        enum: queue-type
299      -
300        name: napi-id
301        doc: ID of the NAPI instance which services this queue.
302        type: u32
303      -
304        name: dmabuf
305        doc: ID of the dmabuf attached to this queue, if any.
306        type: u32
307      -
308        name: io-uring
309        doc: io_uring memory provider information.
310        type: nest
311        nested-attributes: io-uring-provider-info
312
313  -
314    name: qstats
315    doc: |
316      Get device statistics, scoped to a device or a queue.
317      These statistics extend (and partially duplicate) statistics available
318      in struct rtnl_link_stats64.
319      Value of the `scope` attribute determines how statistics are
320      aggregated. When aggregated for the entire device the statistics
321      represent the total number of events since last explicit reset of
322      the device (i.e. not a reconfiguration like changing queue count).
323      When reported per-queue, however, the statistics may not add
324      up to the total number of events, will only be reported for currently
325      active objects, and will likely report the number of events since last
326      reconfiguration.
327    attributes:
328      -
329        name: ifindex
330        doc: ifindex of the netdevice to which stats belong.
331        type: u32
332        checks:
333          min: 1
334      -
335        name: queue-type
336        doc: Queue type as rx, tx, for queue-id.
337        type: u32
338        enum: queue-type
339      -
340        name: queue-id
341        doc: Queue ID, if stats are scoped to a single queue instance.
342        type: u32
343      -
344        name: scope
345        doc: |
346          What object type should be used to iterate over the stats.
347        type: uint
348        enum: qstats-scope
349      -
350        name: rx-packets
351        doc: |
352          Number of wire packets successfully received and passed to the stack.
353          For drivers supporting XDP, XDP is considered the first layer
354          of the stack, so packets consumed by XDP are still counted here.
355        type: uint
356        value: 8 # reserve some attr ids in case we need more metadata later
357      -
358        name: rx-bytes
359        doc: Successfully received bytes, see `rx-packets`.
360        type: uint
361      -
362        name: tx-packets
363        doc: |
364          Number of wire packets successfully sent. Packet is considered to be
365          successfully sent once it is in device memory (usually this means
366          the device has issued a DMA completion for the packet).
367        type: uint
368      -
369        name: tx-bytes
370        doc: Successfully sent bytes, see `tx-packets`.
371        type: uint
372      -
373        name: rx-alloc-fail
374        doc: |
375          Number of times skb or buffer allocation failed on the Rx datapath.
376          Allocation failure may, or may not result in a packet drop, depending
377          on driver implementation and whether system recovers quickly.
378        type: uint
379      -
380        name: rx-hw-drops
381        doc: |
382          Number of all packets which entered the device, but never left it,
383          including but not limited to: packets dropped due to lack of buffer
384          space, processing errors, explicit or implicit policies and packet
385          filters.
386        type: uint
387      -
388        name: rx-hw-drop-overruns
389        doc: |
390          Number of packets dropped due to transient lack of resources, such as
391          buffer space, host descriptors etc.
392        type: uint
393      -
394        name: rx-csum-complete
395        doc: Number of packets that were marked as CHECKSUM_COMPLETE.
396        type: uint
397      -
398        name: rx-csum-unnecessary
399        doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
400        type: uint
401      -
402        name: rx-csum-none
403        doc: Number of packets that were not checksummed by device.
404        type: uint
405      -
406        name: rx-csum-bad
407        doc: |
408          Number of packets with bad checksum. The packets are not discarded,
409          but still delivered to the stack.
410        type: uint
411      -
412        name: rx-hw-gro-packets
413        doc: |
414          Number of packets that were coalesced from smaller packets by the device.
415          Counts only packets coalesced with the HW-GRO netdevice feature,
416          LRO-coalesced packets are not counted.
417        type: uint
418      -
419        name: rx-hw-gro-bytes
420        doc: See `rx-hw-gro-packets`.
421        type: uint
422      -
423        name: rx-hw-gro-wire-packets
424        doc: |
425          Number of packets that were coalesced to bigger packetss with the HW-GRO
426          netdevice feature. LRO-coalesced packets are not counted.
427        type: uint
428      -
429        name: rx-hw-gro-wire-bytes
430        doc: See `rx-hw-gro-wire-packets`.
431        type: uint
432      -
433        name: rx-hw-drop-ratelimits
434        doc: |
435          Number of the packets dropped by the device due to the received
436          packets bitrate exceeding the device rate limit.
437        type: uint
438      -
439        name: tx-hw-drops
440        doc: |
441          Number of packets that arrived at the device but never left it,
442          encompassing packets dropped for reasons such as processing errors, as
443          well as those affected by explicitly defined policies and packet
444          filtering criteria.
445        type: uint
446      -
447        name: tx-hw-drop-errors
448        doc: Number of packets dropped because they were invalid or malformed.
449        type: uint
450      -
451        name: tx-csum-none
452        doc: |
453          Number of packets that did not require the device to calculate the
454          checksum.
455        type: uint
456      -
457        name: tx-needs-csum
458        doc: |
459          Number of packets that required the device to calculate the checksum.
460        type: uint
461      -
462        name: tx-hw-gso-packets
463        doc: |
464          Number of packets that necessitated segmentation into smaller packets
465          by the device.
466        type: uint
467      -
468        name: tx-hw-gso-bytes
469        doc: See `tx-hw-gso-packets`.
470        type: uint
471      -
472        name: tx-hw-gso-wire-packets
473        doc: |
474          Number of wire-sized packets generated by processing
475          `tx-hw-gso-packets`
476        type: uint
477      -
478        name: tx-hw-gso-wire-bytes
479        doc: See `tx-hw-gso-wire-packets`.
480        type: uint
481      -
482        name: tx-hw-drop-ratelimits
483        doc: |
484          Number of the packets dropped by the device due to the transmit
485          packets bitrate exceeding the device rate limit.
486        type: uint
487      -
488        name: tx-stop
489        doc: |
490          Number of times driver paused accepting new tx packets
491          from the stack to this queue, because the queue was full.
492          Note that if BQL is supported and enabled on the device
493          the networking stack will avoid queuing a lot of data at once.
494        type: uint
495      -
496        name: tx-wake
497        doc: |
498          Number of times driver re-started accepting send
499          requests to this queue from the stack.
500        type: uint
501  -
502    name: queue-id
503    subset-of: queue
504    attributes:
505      -
506        name: id
507      -
508        name: type
509  -
510    name: dmabuf
511    attributes:
512      -
513        name: ifindex
514        doc: netdev ifindex to bind the dmabuf to.
515        type: u32
516        checks:
517          min: 1
518      -
519        name: queues
520        doc: receive queues to bind the dmabuf to.
521        type: nest
522        nested-attributes: queue-id
523        multi-attr: true
524      -
525        name: fd
526        doc: dmabuf file descriptor to bind.
527        type: u32
528      -
529        name: id
530        doc: id of the dmabuf binding
531        type: u32
532        checks:
533          min: 1
534
535operations:
536  list:
537    -
538      name: dev-get
539      doc: Get / dump information about a netdev.
540      attribute-set: dev
541      do:
542        request:
543          attributes:
544            - ifindex
545        reply: &dev-all
546          attributes:
547            - ifindex
548            - xdp-features
549            - xdp-zc-max-segs
550            - xdp-rx-metadata-features
551            - xsk-features
552      dump:
553        reply: *dev-all
554    -
555      name: dev-add-ntf
556      doc: Notification about device appearing.
557      notify: dev-get
558      mcgrp: mgmt
559    -
560      name: dev-del-ntf
561      doc: Notification about device disappearing.
562      notify: dev-get
563      mcgrp: mgmt
564    -
565      name: dev-change-ntf
566      doc: Notification about device configuration being changed.
567      notify: dev-get
568      mcgrp: mgmt
569    -
570      name: page-pool-get
571      doc: |
572        Get / dump information about Page Pools.
573        (Only Page Pools associated with a net_device can be listed.)
574      attribute-set: page-pool
575      do:
576        request:
577          attributes:
578            - id
579        reply: &pp-reply
580          attributes:
581            - id
582            - ifindex
583            - napi-id
584            - inflight
585            - inflight-mem
586            - detach-time
587            - dmabuf
588            - io-uring
589      dump:
590        reply: *pp-reply
591      config-cond: page-pool
592    -
593      name: page-pool-add-ntf
594      doc: Notification about page pool appearing.
595      notify: page-pool-get
596      mcgrp: page-pool
597      config-cond: page-pool
598    -
599      name: page-pool-del-ntf
600      doc: Notification about page pool disappearing.
601      notify: page-pool-get
602      mcgrp: page-pool
603      config-cond: page-pool
604    -
605      name: page-pool-change-ntf
606      doc: Notification about page pool configuration being changed.
607      notify: page-pool-get
608      mcgrp: page-pool
609      config-cond: page-pool
610    -
611      name: page-pool-stats-get
612      doc: Get page pool statistics.
613      attribute-set: page-pool-stats
614      do:
615        request:
616          attributes:
617            - info
618        reply: &pp-stats-reply
619          attributes:
620            - info
621            - alloc-fast
622            - alloc-slow
623            - alloc-slow-high-order
624            - alloc-empty
625            - alloc-refill
626            - alloc-waive
627            - recycle-cached
628            - recycle-cache-full
629            - recycle-ring
630            - recycle-ring-full
631            - recycle-released-refcnt
632      dump:
633        reply: *pp-stats-reply
634      config-cond: page-pool-stats
635    -
636      name: queue-get
637      doc: Get queue information from the kernel.
638           Only configured queues will be reported (as opposed to all available
639           hardware queues).
640      attribute-set: queue
641      do:
642        request:
643          attributes:
644            - ifindex
645            - type
646            - id
647        reply: &queue-get-op
648          attributes:
649            - id
650            - type
651            - napi-id
652            - ifindex
653            - dmabuf
654            - io-uring
655      dump:
656        request:
657          attributes:
658            - ifindex
659        reply: *queue-get-op
660    -
661      name: napi-get
662      doc: Get information about NAPI instances configured on the system.
663      attribute-set: napi
664      do:
665        request:
666          attributes:
667            - id
668        reply: &napi-get-op
669          attributes:
670            - id
671            - ifindex
672            - irq
673            - pid
674            - defer-hard-irqs
675            - gro-flush-timeout
676            - irq-suspend-timeout
677      dump:
678        request:
679          attributes:
680            - ifindex
681        reply: *napi-get-op
682    -
683      name: qstats-get
684      doc: |
685        Get / dump fine grained statistics. Which statistics are reported
686        depends on the device and the driver, and whether the driver stores
687        software counters per-queue.
688      attribute-set: qstats
689      dump:
690        request:
691          attributes:
692            - ifindex
693            - scope
694        reply:
695          attributes:
696            - ifindex
697            - queue-type
698            - queue-id
699            - rx-packets
700            - rx-bytes
701            - tx-packets
702            - tx-bytes
703    -
704      name: bind-rx
705      doc: Bind dmabuf to netdev
706      attribute-set: dmabuf
707      flags: [ admin-perm ]
708      do:
709        request:
710          attributes:
711            - ifindex
712            - fd
713            - queues
714        reply:
715          attributes:
716            - id
717    -
718      name: napi-set
719      doc: Set configurable NAPI instance settings.
720      attribute-set: napi
721      flags: [ admin-perm ]
722      do:
723        request:
724          attributes:
725            - id
726            - defer-hard-irqs
727            - gro-flush-timeout
728            - irq-suspend-timeout
729
730kernel-family:
731  headers: [ "linux/list.h"]
732  sock-priv: struct list_head
733
734mcast-groups:
735  list:
736    -
737      name: mgmt
738    -
739      name: page-pool
740