xref: /linux/Documentation/netlink/specs/netdev.yaml (revision f12b363887c706c40611fba645265527a8415832)
1# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
2
3name: netdev
4
5doc:
6  netdev configuration over generic netlink.
7
8definitions:
9  -
10    type: flags
11    name: xdp-act
12    render-max: true
13    entries:
14      -
15        name: basic
16        doc:
17          XDP features set supported by all drivers
18          (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
19      -
20        name: redirect
21        doc:
22          The netdev supports XDP_REDIRECT
23      -
24        name: ndo-xmit
25        doc:
26          This feature informs if netdev implements ndo_xdp_xmit callback.
27      -
28        name: xsk-zerocopy
29        doc:
30          This feature informs if netdev supports AF_XDP in zero copy mode.
31      -
32        name: hw-offload
33        doc:
34         This feature informs if netdev supports XDP hw offloading.
35      -
36        name: rx-sg
37        doc:
38          This feature informs if netdev implements non-linear XDP buffer
39          support in the driver napi callback.
40      -
41        name: ndo-xmit-sg
42        doc:
43          This feature informs if netdev implements non-linear XDP buffer
44          support in ndo_xdp_xmit callback.
45  -
46    type: flags
47    name: xdp-rx-metadata
48    entries:
49      -
50        name: timestamp
51        doc:
52          Device is capable of exposing receive HW timestamp via bpf_xdp_metadata_rx_timestamp().
53      -
54        name: hash
55        doc:
56          Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash().
57      -
58        name: vlan-tag
59        doc:
60          Device is capable of exposing receive packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
61  -
62    type: flags
63    name: xsk-flags
64    entries:
65      -
66        name: tx-timestamp
67        doc:
68          HW timestamping egress packets is supported by the driver.
69      -
70        name: tx-checksum
71        doc:
72          L3 checksum HW offload is supported by the driver.
73  -
74    name: queue-type
75    type: enum
76    entries: [ rx, tx ]
77  -
78    name: qstats-scope
79    type: flags
80    entries: [ queue ]
81
82attribute-sets:
83  -
84    name: dev
85    attributes:
86      -
87        name: ifindex
88        doc: netdev ifindex
89        type: u32
90        checks:
91          min: 1
92      -
93        name: pad
94        type: pad
95      -
96        name: xdp-features
97        doc: Bitmask of enabled xdp-features.
98        type: u64
99        enum: xdp-act
100      -
101        name: xdp-zc-max-segs
102        doc: max fragment count supported by ZC driver
103        type: u32
104        checks:
105          min: 1
106      -
107        name: xdp-rx-metadata-features
108        doc: Bitmask of supported XDP receive metadata features.
109             See Documentation/networking/xdp-rx-metadata.rst for more details.
110        type: u64
111        enum: xdp-rx-metadata
112      -
113        name: xsk-features
114        doc: Bitmask of enabled AF_XDP features.
115        type: u64
116        enum: xsk-flags
117  -
118    name: page-pool
119    attributes:
120      -
121        name: id
122        doc: Unique ID of a Page Pool instance.
123        type: uint
124        checks:
125          min: 1
126          max: u32-max
127      -
128        name: ifindex
129        doc: |
130          ifindex of the netdev to which the pool belongs.
131          May be reported as 0 if the page pool was allocated for a netdev
132          which got destroyed already (page pools may outlast their netdevs
133          because they wait for all memory to be returned).
134        type: u32
135        checks:
136          min: 1
137          max: s32-max
138      -
139        name: napi-id
140        doc: Id of NAPI using this Page Pool instance.
141        type: uint
142        checks:
143          min: 1
144          max: u32-max
145      -
146        name: inflight
147        type: uint
148        doc: |
149          Number of outstanding references to this page pool (allocated
150          but yet to be freed pages). Allocated pages may be held in
151          socket receive queues, driver receive ring, page pool recycling
152          ring, the page pool cache, etc.
153      -
154        name: inflight-mem
155        type: uint
156        doc: |
157          Amount of memory held by inflight pages.
158      -
159        name: detach-time
160        type: uint
161        doc: |
162          Seconds in CLOCK_BOOTTIME of when Page Pool was detached by
163          the driver. Once detached Page Pool can no longer be used to
164          allocate memory.
165          Page Pools wait for all the memory allocated from them to be freed
166          before truly disappearing. "Detached" Page Pools cannot be
167          "re-attached", they are just waiting to disappear.
168          Attribute is absent if Page Pool has not been detached, and
169          can still be used to allocate new memory.
170      -
171        name: dmabuf
172        doc: ID of the dmabuf this page-pool is attached to.
173        type: u32
174  -
175    name: page-pool-info
176    subset-of: page-pool
177    attributes:
178      -
179        name: id
180      -
181        name: ifindex
182  -
183    name: page-pool-stats
184    doc: |
185      Page pool statistics, see docs for struct page_pool_stats
186      for information about individual statistics.
187    attributes:
188      -
189        name: info
190        doc: Page pool identifying information.
191        type: nest
192        nested-attributes: page-pool-info
193      -
194        name: alloc-fast
195        type: uint
196        value: 8 # reserve some attr ids in case we need more metadata later
197      -
198        name: alloc-slow
199        type: uint
200      -
201        name: alloc-slow-high-order
202        type: uint
203      -
204        name: alloc-empty
205        type: uint
206      -
207        name: alloc-refill
208        type: uint
209      -
210        name: alloc-waive
211        type: uint
212      -
213        name: recycle-cached
214        type: uint
215      -
216        name: recycle-cache-full
217        type: uint
218      -
219        name: recycle-ring
220        type: uint
221      -
222        name: recycle-ring-full
223        type: uint
224      -
225        name: recycle-released-refcnt
226        type: uint
227
228  -
229    name: napi
230    attributes:
231      -
232        name: ifindex
233        doc: ifindex of the netdevice to which NAPI instance belongs.
234        type: u32
235        checks:
236          min: 1
237      -
238        name: id
239        doc: ID of the NAPI instance.
240        type: u32
241      -
242        name: irq
243        doc: The associated interrupt vector number for the napi
244        type: u32
245      -
246        name: pid
247        doc: PID of the napi thread, if NAPI is configured to operate in
248             threaded mode. If NAPI is not in threaded mode (i.e. uses normal
249             softirq context), the attribute will be absent.
250        type: u32
251      -
252        name: defer-hard-irqs
253        doc: The number of consecutive empty polls before IRQ deferral ends
254             and hardware IRQs are re-enabled.
255        type: u32
256        checks:
257          max: s32-max
258      -
259        name: gro-flush-timeout
260        doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog
261             timer which schedules NAPI processing. Additionally, a non-zero
262             value will also prevent GRO from flushing recent super-frames at
263             the end of a NAPI cycle. This may add receive latency in exchange
264             for reducing the number of frames processed by the network stack.
265        type: uint
266  -
267    name: queue
268    attributes:
269      -
270        name: id
271        doc: Queue index; most queue types are indexed like a C array, with
272             indexes starting at 0 and ending at queue count - 1. Queue indexes
273             are scoped to an interface and queue type.
274        type: u32
275      -
276        name: ifindex
277        doc: ifindex of the netdevice to which the queue belongs.
278        type: u32
279        checks:
280          min: 1
281      -
282        name: type
283        doc: Queue type as rx, tx. Each queue type defines a separate ID space.
284        type: u32
285        enum: queue-type
286      -
287        name: napi-id
288        doc: ID of the NAPI instance which services this queue.
289        type: u32
290      -
291        name: dmabuf
292        doc: ID of the dmabuf attached to this queue, if any.
293        type: u32
294
295  -
296    name: qstats
297    doc: |
298      Get device statistics, scoped to a device or a queue.
299      These statistics extend (and partially duplicate) statistics available
300      in struct rtnl_link_stats64.
301      Value of the `scope` attribute determines how statistics are
302      aggregated. When aggregated for the entire device the statistics
303      represent the total number of events since last explicit reset of
304      the device (i.e. not a reconfiguration like changing queue count).
305      When reported per-queue, however, the statistics may not add
306      up to the total number of events, will only be reported for currently
307      active objects, and will likely report the number of events since last
308      reconfiguration.
309    attributes:
310      -
311        name: ifindex
312        doc: ifindex of the netdevice to which stats belong.
313        type: u32
314        checks:
315          min: 1
316      -
317        name: queue-type
318        doc: Queue type as rx, tx, for queue-id.
319        type: u32
320        enum: queue-type
321      -
322        name: queue-id
323        doc: Queue ID, if stats are scoped to a single queue instance.
324        type: u32
325      -
326        name: scope
327        doc: |
328          What object type should be used to iterate over the stats.
329        type: uint
330        enum: qstats-scope
331      -
332        name: rx-packets
333        doc: |
334          Number of wire packets successfully received and passed to the stack.
335          For drivers supporting XDP, XDP is considered the first layer
336          of the stack, so packets consumed by XDP are still counted here.
337        type: uint
338        value: 8 # reserve some attr ids in case we need more metadata later
339      -
340        name: rx-bytes
341        doc: Successfully received bytes, see `rx-packets`.
342        type: uint
343      -
344        name: tx-packets
345        doc: |
346          Number of wire packets successfully sent. Packet is considered to be
347          successfully sent once it is in device memory (usually this means
348          the device has issued a DMA completion for the packet).
349        type: uint
350      -
351        name: tx-bytes
352        doc: Successfully sent bytes, see `tx-packets`.
353        type: uint
354      -
355        name: rx-alloc-fail
356        doc: |
357          Number of times skb or buffer allocation failed on the Rx datapath.
358          Allocation failure may, or may not result in a packet drop, depending
359          on driver implementation and whether system recovers quickly.
360        type: uint
361      -
362        name: rx-hw-drops
363        doc: |
364          Number of all packets which entered the device, but never left it,
365          including but not limited to: packets dropped due to lack of buffer
366          space, processing errors, explicit or implicit policies and packet
367          filters.
368        type: uint
369      -
370        name: rx-hw-drop-overruns
371        doc: |
372          Number of packets dropped due to transient lack of resources, such as
373          buffer space, host descriptors etc.
374        type: uint
375      -
376        name: rx-csum-complete
377        doc: Number of packets that were marked as CHECKSUM_COMPLETE.
378        type: uint
379      -
380        name: rx-csum-unnecessary
381        doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
382        type: uint
383      -
384        name: rx-csum-none
385        doc: Number of packets that were not checksummed by device.
386        type: uint
387      -
388        name: rx-csum-bad
389        doc: |
390          Number of packets with bad checksum. The packets are not discarded,
391          but still delivered to the stack.
392        type: uint
393      -
394        name: rx-hw-gro-packets
395        doc: |
396          Number of packets that were coalesced from smaller packets by the device.
397          Counts only packets coalesced with the HW-GRO netdevice feature,
398          LRO-coalesced packets are not counted.
399        type: uint
400      -
401        name: rx-hw-gro-bytes
402        doc: See `rx-hw-gro-packets`.
403        type: uint
404      -
405        name: rx-hw-gro-wire-packets
406        doc: |
407          Number of packets that were coalesced to bigger packetss with the HW-GRO
408          netdevice feature. LRO-coalesced packets are not counted.
409        type: uint
410      -
411        name: rx-hw-gro-wire-bytes
412        doc: See `rx-hw-gro-wire-packets`.
413        type: uint
414      -
415        name: rx-hw-drop-ratelimits
416        doc: |
417          Number of the packets dropped by the device due to the received
418          packets bitrate exceeding the device rate limit.
419        type: uint
420      -
421        name: tx-hw-drops
422        doc: |
423          Number of packets that arrived at the device but never left it,
424          encompassing packets dropped for reasons such as processing errors, as
425          well as those affected by explicitly defined policies and packet
426          filtering criteria.
427        type: uint
428      -
429        name: tx-hw-drop-errors
430        doc: Number of packets dropped because they were invalid or malformed.
431        type: uint
432      -
433        name: tx-csum-none
434        doc: |
435          Number of packets that did not require the device to calculate the
436          checksum.
437        type: uint
438      -
439        name: tx-needs-csum
440        doc: |
441          Number of packets that required the device to calculate the checksum.
442        type: uint
443      -
444        name: tx-hw-gso-packets
445        doc: |
446          Number of packets that necessitated segmentation into smaller packets
447          by the device.
448        type: uint
449      -
450        name: tx-hw-gso-bytes
451        doc: See `tx-hw-gso-packets`.
452        type: uint
453      -
454        name: tx-hw-gso-wire-packets
455        doc: |
456          Number of wire-sized packets generated by processing
457          `tx-hw-gso-packets`
458        type: uint
459      -
460        name: tx-hw-gso-wire-bytes
461        doc: See `tx-hw-gso-wire-packets`.
462        type: uint
463      -
464        name: tx-hw-drop-ratelimits
465        doc: |
466          Number of the packets dropped by the device due to the transmit
467          packets bitrate exceeding the device rate limit.
468        type: uint
469      -
470        name: tx-stop
471        doc: |
472          Number of times driver paused accepting new tx packets
473          from the stack to this queue, because the queue was full.
474          Note that if BQL is supported and enabled on the device
475          the networking stack will avoid queuing a lot of data at once.
476        type: uint
477      -
478        name: tx-wake
479        doc: |
480          Number of times driver re-started accepting send
481          requests to this queue from the stack.
482        type: uint
483  -
484    name: queue-id
485    subset-of: queue
486    attributes:
487      -
488        name: id
489      -
490        name: type
491  -
492    name: dmabuf
493    attributes:
494      -
495        name: ifindex
496        doc: netdev ifindex to bind the dmabuf to.
497        type: u32
498        checks:
499          min: 1
500      -
501        name: queues
502        doc: receive queues to bind the dmabuf to.
503        type: nest
504        nested-attributes: queue-id
505        multi-attr: true
506      -
507        name: fd
508        doc: dmabuf file descriptor to bind.
509        type: u32
510      -
511        name: id
512        doc: id of the dmabuf binding
513        type: u32
514        checks:
515          min: 1
516
517operations:
518  list:
519    -
520      name: dev-get
521      doc: Get / dump information about a netdev.
522      attribute-set: dev
523      do:
524        request:
525          attributes:
526            - ifindex
527        reply: &dev-all
528          attributes:
529            - ifindex
530            - xdp-features
531            - xdp-zc-max-segs
532            - xdp-rx-metadata-features
533            - xsk-features
534      dump:
535        reply: *dev-all
536    -
537      name: dev-add-ntf
538      doc: Notification about device appearing.
539      notify: dev-get
540      mcgrp: mgmt
541    -
542      name: dev-del-ntf
543      doc: Notification about device disappearing.
544      notify: dev-get
545      mcgrp: mgmt
546    -
547      name: dev-change-ntf
548      doc: Notification about device configuration being changed.
549      notify: dev-get
550      mcgrp: mgmt
551    -
552      name: page-pool-get
553      doc: |
554        Get / dump information about Page Pools.
555        (Only Page Pools associated with a net_device can be listed.)
556      attribute-set: page-pool
557      do:
558        request:
559          attributes:
560            - id
561        reply: &pp-reply
562          attributes:
563            - id
564            - ifindex
565            - napi-id
566            - inflight
567            - inflight-mem
568            - detach-time
569            - dmabuf
570      dump:
571        reply: *pp-reply
572      config-cond: page-pool
573    -
574      name: page-pool-add-ntf
575      doc: Notification about page pool appearing.
576      notify: page-pool-get
577      mcgrp: page-pool
578      config-cond: page-pool
579    -
580      name: page-pool-del-ntf
581      doc: Notification about page pool disappearing.
582      notify: page-pool-get
583      mcgrp: page-pool
584      config-cond: page-pool
585    -
586      name: page-pool-change-ntf
587      doc: Notification about page pool configuration being changed.
588      notify: page-pool-get
589      mcgrp: page-pool
590      config-cond: page-pool
591    -
592      name: page-pool-stats-get
593      doc: Get page pool statistics.
594      attribute-set: page-pool-stats
595      do:
596        request:
597          attributes:
598            - info
599        reply: &pp-stats-reply
600          attributes:
601            - info
602            - alloc-fast
603            - alloc-slow
604            - alloc-slow-high-order
605            - alloc-empty
606            - alloc-refill
607            - alloc-waive
608            - recycle-cached
609            - recycle-cache-full
610            - recycle-ring
611            - recycle-ring-full
612            - recycle-released-refcnt
613      dump:
614        reply: *pp-stats-reply
615      config-cond: page-pool-stats
616    -
617      name: queue-get
618      doc: Get queue information from the kernel.
619           Only configured queues will be reported (as opposed to all available
620           hardware queues).
621      attribute-set: queue
622      do:
623        request:
624          attributes:
625            - ifindex
626            - type
627            - id
628        reply: &queue-get-op
629          attributes:
630            - id
631            - type
632            - napi-id
633            - ifindex
634            - dmabuf
635      dump:
636        request:
637          attributes:
638            - ifindex
639        reply: *queue-get-op
640    -
641      name: napi-get
642      doc: Get information about NAPI instances configured on the system.
643      attribute-set: napi
644      do:
645        request:
646          attributes:
647            - id
648        reply: &napi-get-op
649          attributes:
650            - id
651            - ifindex
652            - irq
653            - pid
654            - defer-hard-irqs
655            - gro-flush-timeout
656      dump:
657        request:
658          attributes:
659            - ifindex
660        reply: *napi-get-op
661    -
662      name: qstats-get
663      doc: |
664        Get / dump fine grained statistics. Which statistics are reported
665        depends on the device and the driver, and whether the driver stores
666        software counters per-queue.
667      attribute-set: qstats
668      dump:
669        request:
670          attributes:
671            - ifindex
672            - scope
673        reply:
674          attributes:
675            - ifindex
676            - queue-type
677            - queue-id
678            - rx-packets
679            - rx-bytes
680            - tx-packets
681            - tx-bytes
682    -
683      name: bind-rx
684      doc: Bind dmabuf to netdev
685      attribute-set: dmabuf
686      flags: [ admin-perm ]
687      do:
688        request:
689          attributes:
690            - ifindex
691            - fd
692            - queues
693        reply:
694          attributes:
695            - id
696    -
697      name: napi-set
698      doc: Set configurable NAPI instance settings.
699      attribute-set: napi
700      flags: [ admin-perm ]
701      do:
702        request:
703          attributes:
704            - id
705            - defer-hard-irqs
706            - gro-flush-timeout
707
708kernel-family:
709  headers: [ "linux/list.h"]
710  sock-priv: struct list_head
711
712mcast-groups:
713  list:
714    -
715      name: mgmt
716    -
717      name: page-pool
718