xref: /linux/Documentation/netlink/specs/netdev.yaml (revision 3fd6c59042dbba50391e30862beac979491145fe)
1# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
2
3name: netdev
4
5doc:
6  netdev configuration over generic netlink.
7
8definitions:
9  -
10    type: flags
11    name: xdp-act
12    render-max: true
13    entries:
14      -
15        name: basic
16        doc:
17          XDP features set supported by all drivers
18          (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
19      -
20        name: redirect
21        doc:
22          The netdev supports XDP_REDIRECT
23      -
24        name: ndo-xmit
25        doc:
26          This feature informs if netdev implements ndo_xdp_xmit callback.
27      -
28        name: xsk-zerocopy
29        doc:
30          This feature informs if netdev supports AF_XDP in zero copy mode.
31      -
32        name: hw-offload
33        doc:
34         This feature informs if netdev supports XDP hw offloading.
35      -
36        name: rx-sg
37        doc:
38          This feature informs if netdev implements non-linear XDP buffer
39          support in the driver napi callback.
40      -
41        name: ndo-xmit-sg
42        doc:
43          This feature informs if netdev implements non-linear XDP buffer
44          support in ndo_xdp_xmit callback.
45  -
46    type: flags
47    name: xdp-rx-metadata
48    entries:
49      -
50        name: timestamp
51        doc:
52          Device is capable of exposing receive HW timestamp via bpf_xdp_metadata_rx_timestamp().
53      -
54        name: hash
55        doc:
56          Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash().
57      -
58        name: vlan-tag
59        doc:
60          Device is capable of exposing receive packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
61  -
62    type: flags
63    name: xsk-flags
64    entries:
65      -
66        name: tx-timestamp
67        doc:
68          HW timestamping egress packets is supported by the driver.
69      -
70        name: tx-checksum
71        doc:
72          L3 checksum HW offload is supported by the driver.
73  -
74    name: queue-type
75    type: enum
76    entries: [ rx, tx ]
77  -
78    name: qstats-scope
79    type: flags
80    entries: [ queue ]
81
82attribute-sets:
83  -
84    name: dev
85    attributes:
86      -
87        name: ifindex
88        doc: netdev ifindex
89        type: u32
90        checks:
91          min: 1
92      -
93        name: pad
94        type: pad
95      -
96        name: xdp-features
97        doc: Bitmask of enabled xdp-features.
98        type: u64
99        enum: xdp-act
100      -
101        name: xdp-zc-max-segs
102        doc: max fragment count supported by ZC driver
103        type: u32
104        checks:
105          min: 1
106      -
107        name: xdp-rx-metadata-features
108        doc: Bitmask of supported XDP receive metadata features.
109             See Documentation/networking/xdp-rx-metadata.rst for more details.
110        type: u64
111        enum: xdp-rx-metadata
112      -
113        name: xsk-features
114        doc: Bitmask of enabled AF_XDP features.
115        type: u64
116        enum: xsk-flags
117  -
118    name: page-pool
119    attributes:
120      -
121        name: id
122        doc: Unique ID of a Page Pool instance.
123        type: uint
124        checks:
125          min: 1
126          max: u32-max
127      -
128        name: ifindex
129        doc: |
130          ifindex of the netdev to which the pool belongs.
131          May be reported as 0 if the page pool was allocated for a netdev
132          which got destroyed already (page pools may outlast their netdevs
133          because they wait for all memory to be returned).
134        type: u32
135        checks:
136          min: 1
137          max: s32-max
138      -
139        name: napi-id
140        doc: Id of NAPI using this Page Pool instance.
141        type: uint
142        checks:
143          min: 1
144          max: u32-max
145      -
146        name: inflight
147        type: uint
148        doc: |
149          Number of outstanding references to this page pool (allocated
150          but yet to be freed pages). Allocated pages may be held in
151          socket receive queues, driver receive ring, page pool recycling
152          ring, the page pool cache, etc.
153      -
154        name: inflight-mem
155        type: uint
156        doc: |
157          Amount of memory held by inflight pages.
158      -
159        name: detach-time
160        type: uint
161        doc: |
162          Seconds in CLOCK_BOOTTIME of when Page Pool was detached by
163          the driver. Once detached Page Pool can no longer be used to
164          allocate memory.
165          Page Pools wait for all the memory allocated from them to be freed
166          before truly disappearing. "Detached" Page Pools cannot be
167          "re-attached", they are just waiting to disappear.
168          Attribute is absent if Page Pool has not been detached, and
169          can still be used to allocate new memory.
170      -
171        name: dmabuf
172        doc: ID of the dmabuf this page-pool is attached to.
173        type: u32
174  -
175    name: page-pool-info
176    subset-of: page-pool
177    attributes:
178      -
179        name: id
180      -
181        name: ifindex
182  -
183    name: page-pool-stats
184    doc: |
185      Page pool statistics, see docs for struct page_pool_stats
186      for information about individual statistics.
187    attributes:
188      -
189        name: info
190        doc: Page pool identifying information.
191        type: nest
192        nested-attributes: page-pool-info
193      -
194        name: alloc-fast
195        type: uint
196        value: 8 # reserve some attr ids in case we need more metadata later
197      -
198        name: alloc-slow
199        type: uint
200      -
201        name: alloc-slow-high-order
202        type: uint
203      -
204        name: alloc-empty
205        type: uint
206      -
207        name: alloc-refill
208        type: uint
209      -
210        name: alloc-waive
211        type: uint
212      -
213        name: recycle-cached
214        type: uint
215      -
216        name: recycle-cache-full
217        type: uint
218      -
219        name: recycle-ring
220        type: uint
221      -
222        name: recycle-ring-full
223        type: uint
224      -
225        name: recycle-released-refcnt
226        type: uint
227
228  -
229    name: napi
230    attributes:
231      -
232        name: ifindex
233        doc: ifindex of the netdevice to which NAPI instance belongs.
234        type: u32
235        checks:
236          min: 1
237      -
238        name: id
239        doc: ID of the NAPI instance.
240        type: u32
241      -
242        name: irq
243        doc: The associated interrupt vector number for the napi
244        type: u32
245      -
246        name: pid
247        doc: PID of the napi thread, if NAPI is configured to operate in
248             threaded mode. If NAPI is not in threaded mode (i.e. uses normal
249             softirq context), the attribute will be absent.
250        type: u32
251      -
252        name: defer-hard-irqs
253        doc: The number of consecutive empty polls before IRQ deferral ends
254             and hardware IRQs are re-enabled.
255        type: u32
256        checks:
257          max: s32-max
258      -
259        name: gro-flush-timeout
260        doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog
261             timer which schedules NAPI processing. Additionally, a non-zero
262             value will also prevent GRO from flushing recent super-frames at
263             the end of a NAPI cycle. This may add receive latency in exchange
264             for reducing the number of frames processed by the network stack.
265        type: uint
266      -
267        name: irq-suspend-timeout
268        doc: The timeout, in nanoseconds, of how long to suspend irq
269             processing, if event polling finds events
270        type: uint
271  -
272    name: queue
273    attributes:
274      -
275        name: id
276        doc: Queue index; most queue types are indexed like a C array, with
277             indexes starting at 0 and ending at queue count - 1. Queue indexes
278             are scoped to an interface and queue type.
279        type: u32
280      -
281        name: ifindex
282        doc: ifindex of the netdevice to which the queue belongs.
283        type: u32
284        checks:
285          min: 1
286      -
287        name: type
288        doc: Queue type as rx, tx. Each queue type defines a separate ID space.
289        type: u32
290        enum: queue-type
291      -
292        name: napi-id
293        doc: ID of the NAPI instance which services this queue.
294        type: u32
295      -
296        name: dmabuf
297        doc: ID of the dmabuf attached to this queue, if any.
298        type: u32
299
300  -
301    name: qstats
302    doc: |
303      Get device statistics, scoped to a device or a queue.
304      These statistics extend (and partially duplicate) statistics available
305      in struct rtnl_link_stats64.
306      Value of the `scope` attribute determines how statistics are
307      aggregated. When aggregated for the entire device the statistics
308      represent the total number of events since last explicit reset of
309      the device (i.e. not a reconfiguration like changing queue count).
310      When reported per-queue, however, the statistics may not add
311      up to the total number of events, will only be reported for currently
312      active objects, and will likely report the number of events since last
313      reconfiguration.
314    attributes:
315      -
316        name: ifindex
317        doc: ifindex of the netdevice to which stats belong.
318        type: u32
319        checks:
320          min: 1
321      -
322        name: queue-type
323        doc: Queue type as rx, tx, for queue-id.
324        type: u32
325        enum: queue-type
326      -
327        name: queue-id
328        doc: Queue ID, if stats are scoped to a single queue instance.
329        type: u32
330      -
331        name: scope
332        doc: |
333          What object type should be used to iterate over the stats.
334        type: uint
335        enum: qstats-scope
336      -
337        name: rx-packets
338        doc: |
339          Number of wire packets successfully received and passed to the stack.
340          For drivers supporting XDP, XDP is considered the first layer
341          of the stack, so packets consumed by XDP are still counted here.
342        type: uint
343        value: 8 # reserve some attr ids in case we need more metadata later
344      -
345        name: rx-bytes
346        doc: Successfully received bytes, see `rx-packets`.
347        type: uint
348      -
349        name: tx-packets
350        doc: |
351          Number of wire packets successfully sent. Packet is considered to be
352          successfully sent once it is in device memory (usually this means
353          the device has issued a DMA completion for the packet).
354        type: uint
355      -
356        name: tx-bytes
357        doc: Successfully sent bytes, see `tx-packets`.
358        type: uint
359      -
360        name: rx-alloc-fail
361        doc: |
362          Number of times skb or buffer allocation failed on the Rx datapath.
363          Allocation failure may, or may not result in a packet drop, depending
364          on driver implementation and whether system recovers quickly.
365        type: uint
366      -
367        name: rx-hw-drops
368        doc: |
369          Number of all packets which entered the device, but never left it,
370          including but not limited to: packets dropped due to lack of buffer
371          space, processing errors, explicit or implicit policies and packet
372          filters.
373        type: uint
374      -
375        name: rx-hw-drop-overruns
376        doc: |
377          Number of packets dropped due to transient lack of resources, such as
378          buffer space, host descriptors etc.
379        type: uint
380      -
381        name: rx-csum-complete
382        doc: Number of packets that were marked as CHECKSUM_COMPLETE.
383        type: uint
384      -
385        name: rx-csum-unnecessary
386        doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
387        type: uint
388      -
389        name: rx-csum-none
390        doc: Number of packets that were not checksummed by device.
391        type: uint
392      -
393        name: rx-csum-bad
394        doc: |
395          Number of packets with bad checksum. The packets are not discarded,
396          but still delivered to the stack.
397        type: uint
398      -
399        name: rx-hw-gro-packets
400        doc: |
401          Number of packets that were coalesced from smaller packets by the device.
402          Counts only packets coalesced with the HW-GRO netdevice feature,
403          LRO-coalesced packets are not counted.
404        type: uint
405      -
406        name: rx-hw-gro-bytes
407        doc: See `rx-hw-gro-packets`.
408        type: uint
409      -
410        name: rx-hw-gro-wire-packets
411        doc: |
412          Number of packets that were coalesced to bigger packetss with the HW-GRO
413          netdevice feature. LRO-coalesced packets are not counted.
414        type: uint
415      -
416        name: rx-hw-gro-wire-bytes
417        doc: See `rx-hw-gro-wire-packets`.
418        type: uint
419      -
420        name: rx-hw-drop-ratelimits
421        doc: |
422          Number of the packets dropped by the device due to the received
423          packets bitrate exceeding the device rate limit.
424        type: uint
425      -
426        name: tx-hw-drops
427        doc: |
428          Number of packets that arrived at the device but never left it,
429          encompassing packets dropped for reasons such as processing errors, as
430          well as those affected by explicitly defined policies and packet
431          filtering criteria.
432        type: uint
433      -
434        name: tx-hw-drop-errors
435        doc: Number of packets dropped because they were invalid or malformed.
436        type: uint
437      -
438        name: tx-csum-none
439        doc: |
440          Number of packets that did not require the device to calculate the
441          checksum.
442        type: uint
443      -
444        name: tx-needs-csum
445        doc: |
446          Number of packets that required the device to calculate the checksum.
447        type: uint
448      -
449        name: tx-hw-gso-packets
450        doc: |
451          Number of packets that necessitated segmentation into smaller packets
452          by the device.
453        type: uint
454      -
455        name: tx-hw-gso-bytes
456        doc: See `tx-hw-gso-packets`.
457        type: uint
458      -
459        name: tx-hw-gso-wire-packets
460        doc: |
461          Number of wire-sized packets generated by processing
462          `tx-hw-gso-packets`
463        type: uint
464      -
465        name: tx-hw-gso-wire-bytes
466        doc: See `tx-hw-gso-wire-packets`.
467        type: uint
468      -
469        name: tx-hw-drop-ratelimits
470        doc: |
471          Number of the packets dropped by the device due to the transmit
472          packets bitrate exceeding the device rate limit.
473        type: uint
474      -
475        name: tx-stop
476        doc: |
477          Number of times driver paused accepting new tx packets
478          from the stack to this queue, because the queue was full.
479          Note that if BQL is supported and enabled on the device
480          the networking stack will avoid queuing a lot of data at once.
481        type: uint
482      -
483        name: tx-wake
484        doc: |
485          Number of times driver re-started accepting send
486          requests to this queue from the stack.
487        type: uint
488  -
489    name: queue-id
490    subset-of: queue
491    attributes:
492      -
493        name: id
494      -
495        name: type
496  -
497    name: dmabuf
498    attributes:
499      -
500        name: ifindex
501        doc: netdev ifindex to bind the dmabuf to.
502        type: u32
503        checks:
504          min: 1
505      -
506        name: queues
507        doc: receive queues to bind the dmabuf to.
508        type: nest
509        nested-attributes: queue-id
510        multi-attr: true
511      -
512        name: fd
513        doc: dmabuf file descriptor to bind.
514        type: u32
515      -
516        name: id
517        doc: id of the dmabuf binding
518        type: u32
519        checks:
520          min: 1
521
522operations:
523  list:
524    -
525      name: dev-get
526      doc: Get / dump information about a netdev.
527      attribute-set: dev
528      do:
529        request:
530          attributes:
531            - ifindex
532        reply: &dev-all
533          attributes:
534            - ifindex
535            - xdp-features
536            - xdp-zc-max-segs
537            - xdp-rx-metadata-features
538            - xsk-features
539      dump:
540        reply: *dev-all
541    -
542      name: dev-add-ntf
543      doc: Notification about device appearing.
544      notify: dev-get
545      mcgrp: mgmt
546    -
547      name: dev-del-ntf
548      doc: Notification about device disappearing.
549      notify: dev-get
550      mcgrp: mgmt
551    -
552      name: dev-change-ntf
553      doc: Notification about device configuration being changed.
554      notify: dev-get
555      mcgrp: mgmt
556    -
557      name: page-pool-get
558      doc: |
559        Get / dump information about Page Pools.
560        (Only Page Pools associated with a net_device can be listed.)
561      attribute-set: page-pool
562      do:
563        request:
564          attributes:
565            - id
566        reply: &pp-reply
567          attributes:
568            - id
569            - ifindex
570            - napi-id
571            - inflight
572            - inflight-mem
573            - detach-time
574            - dmabuf
575      dump:
576        reply: *pp-reply
577      config-cond: page-pool
578    -
579      name: page-pool-add-ntf
580      doc: Notification about page pool appearing.
581      notify: page-pool-get
582      mcgrp: page-pool
583      config-cond: page-pool
584    -
585      name: page-pool-del-ntf
586      doc: Notification about page pool disappearing.
587      notify: page-pool-get
588      mcgrp: page-pool
589      config-cond: page-pool
590    -
591      name: page-pool-change-ntf
592      doc: Notification about page pool configuration being changed.
593      notify: page-pool-get
594      mcgrp: page-pool
595      config-cond: page-pool
596    -
597      name: page-pool-stats-get
598      doc: Get page pool statistics.
599      attribute-set: page-pool-stats
600      do:
601        request:
602          attributes:
603            - info
604        reply: &pp-stats-reply
605          attributes:
606            - info
607            - alloc-fast
608            - alloc-slow
609            - alloc-slow-high-order
610            - alloc-empty
611            - alloc-refill
612            - alloc-waive
613            - recycle-cached
614            - recycle-cache-full
615            - recycle-ring
616            - recycle-ring-full
617            - recycle-released-refcnt
618      dump:
619        reply: *pp-stats-reply
620      config-cond: page-pool-stats
621    -
622      name: queue-get
623      doc: Get queue information from the kernel.
624           Only configured queues will be reported (as opposed to all available
625           hardware queues).
626      attribute-set: queue
627      do:
628        request:
629          attributes:
630            - ifindex
631            - type
632            - id
633        reply: &queue-get-op
634          attributes:
635            - id
636            - type
637            - napi-id
638            - ifindex
639            - dmabuf
640      dump:
641        request:
642          attributes:
643            - ifindex
644        reply: *queue-get-op
645    -
646      name: napi-get
647      doc: Get information about NAPI instances configured on the system.
648      attribute-set: napi
649      do:
650        request:
651          attributes:
652            - id
653        reply: &napi-get-op
654          attributes:
655            - id
656            - ifindex
657            - irq
658            - pid
659            - defer-hard-irqs
660            - gro-flush-timeout
661            - irq-suspend-timeout
662      dump:
663        request:
664          attributes:
665            - ifindex
666        reply: *napi-get-op
667    -
668      name: qstats-get
669      doc: |
670        Get / dump fine grained statistics. Which statistics are reported
671        depends on the device and the driver, and whether the driver stores
672        software counters per-queue.
673      attribute-set: qstats
674      dump:
675        request:
676          attributes:
677            - ifindex
678            - scope
679        reply:
680          attributes:
681            - ifindex
682            - queue-type
683            - queue-id
684            - rx-packets
685            - rx-bytes
686            - tx-packets
687            - tx-bytes
688    -
689      name: bind-rx
690      doc: Bind dmabuf to netdev
691      attribute-set: dmabuf
692      flags: [ admin-perm ]
693      do:
694        request:
695          attributes:
696            - ifindex
697            - fd
698            - queues
699        reply:
700          attributes:
701            - id
702    -
703      name: napi-set
704      doc: Set configurable NAPI instance settings.
705      attribute-set: napi
706      flags: [ admin-perm ]
707      do:
708        request:
709          attributes:
710            - id
711            - defer-hard-irqs
712            - gro-flush-timeout
713            - irq-suspend-timeout
714
715kernel-family:
716  headers: [ "linux/list.h"]
717  sock-priv: struct list_head
718
719mcast-groups:
720  list:
721    -
722      name: mgmt
723    -
724      name: page-pool
725