xref: /linux/Documentation/bpf/map_lru_hash_update.dot (revision 68f4e480b089abae26fbab0c38c3df3cbac3d79d)
1// SPDX-License-Identifier: GPL-2.0-only
2// Copyright (C) 2022-2023 Isovalent, Inc.
3digraph {
4  node [colorscheme=accent4,style=filled] # Apply colorscheme to all nodes
5  graph [splines=ortho, nodesep=1]
6
7  subgraph cluster_key {
8    label = "Key\n(locks held during operation)";
9    rankdir = TB;
10
11    remote_lock [shape=rectangle,fillcolor=4,label="remote CPU LRU lock"]
12    hash_lock [shape=rectangle,fillcolor=3,label="hashtab lock"]
13    lru_lock [shape=rectangle,fillcolor=2,label="LRU lock"]
14    local_lock [shape=rectangle,fillcolor=1,label="local CPU LRU lock"]
15    no_lock [shape=rectangle,label="no locks held"]
16  }
17
18  begin [shape=oval,label="begin\nbpf_map_update()"]
19
20  // Nodes below with an 'fn_' prefix are roughly labeled by the C function
21  // names that initiate the corresponding logic in kernel/bpf/bpf_lru_list.c.
22  // Number suffixes and errno suffixes handle subsections of the corresponding
23  // logic in the function as of the writing of this dot.
24  //
25  // All LRU locks are rqspinlock_t. Every acquire can fail (AA self-deadlock
26  // or contention timeout); on failure the corresponding helper returns NULL
27  // and the caller propagates -ENOMEM. The "rqspinlock acquire failed"
28  // terminal below is reached via the dashed arrows from each acquire site.
29
30  rqspinlock_failed [shape=rectangle,
31    label="Any LRU rqspinlock\nacquire fails\n(AA or timeout)"]
32
33  // cf. __local_list_pop_free() / bpf_percpu_lru_pop_free()
34  local_freelist_check [shape=diamond,fillcolor=1,
35    label="Local freelist\nnode available?\n(lockless free_llist)"];
36  use_local_node [shape=rectangle,
37    label="Use node owned\nby this CPU"]
38
39  // cf. bpf_lru_pop_free()
40  common_lru_check [shape=diamond,
41    label="Map created with\ncommon LRU?\n(!BPF_F_NO_COMMON_LRU)"];
42
43  fn_bpf_lru_list_pop_free_to_local [shape=rectangle,fillcolor=2,
44    label="Flush local pending,
45    Rotate Global list, move
46    target_free
47    from global -> local"]
48  // Also corresponds to:
49  // fn__local_list_flush()
50  // fn_bpf_lru_list_rotate()
51  fn___bpf_lru_node_move_to_free[shape=diamond,fillcolor=2,
52    label="Able to free\ntarget_free\nnodes?"]
53
54  fn___bpf_lru_list_shrink_inactive [shape=rectangle,fillcolor=3,
55    label="Shrink inactive list
56      up to remaining
57      target_free
58      (global LRU -> local)"]
59  fn___bpf_lru_list_shrink [shape=diamond,fillcolor=2,
60    label="> 0 entries in\nlocal free list?"]
61  fn___bpf_lru_list_shrink2 [shape=rectangle,fillcolor=2,
62    label="Steal one node from
63      inactive, or if empty,
64      from active global list"]
65  fn___bpf_lru_list_shrink3 [shape=rectangle,fillcolor=3,
66    label="Try to remove\nnode from hashtab"]
67
68  local_freelist_check2 [shape=diamond,label="Htab removal\nsuccessful?"]
69  common_lru_check2 [shape=diamond,
70    label="Map created with\ncommon LRU?\n(!BPF_F_NO_COMMON_LRU)"];
71
72  subgraph cluster_remote_lock {
73    label = "Iterate through CPUs\n(start from current)";
74    style = dashed;
75    rankdir=LR;
76
77    local_freelist_check5 [shape=diamond,fillcolor=4,
78      label="Steal a node from\nper-cpu freelist?"]
79    local_freelist_check6 [shape=rectangle,fillcolor=4,
80      label="Steal a node from
81        (1) Unreferenced pending, or
82        (2) Any pending node"]
83    local_freelist_check7 [shape=rectangle,fillcolor=3,
84      label="Try to remove\nnode from hashtab"]
85    fn_htab_lru_map_update_elem [shape=diamond,
86      label="Stole node\nfrom remote\nCPU?"]
87    fn_htab_lru_map_update_elem2 [shape=diamond,label="Iterated\nall CPUs?"]
88    // Also corresponds to:
89    // use_local_node()
90    // fn__local_list_pop_pending()
91  }
92
93  // Post-steal: re-acquire local loc_l->lock to insert the stolen node into
94  // the local pending list. If the acquire fails, the stolen node is published
95  // to the lockless local free_llist so the next pop on this CPU picks it up
96  // instead of orphaning it.
97  post_steal_lock [shape=diamond,fillcolor=1,
98    label="Acquire local\nloc_l->lock\nto add pending"]
99  post_steal_to_free_llist [shape=rectangle,
100    label="Publish stolen node to\nlocal free_llist (lockless)"]
101
102  fn_bpf_lru_list_pop_free_to_local2 [shape=rectangle,
103    label="Use node that was\nnot recently referenced"]
104  local_freelist_check4 [shape=rectangle,
105    label="Use node that was\nactively referenced\nin global list"]
106  fn_htab_lru_map_update_elem_ENOMEM [shape=oval,label="return -ENOMEM"]
107  fn_htab_lru_map_update_elem3 [shape=rectangle,
108    label="Use node that was\nactively referenced\nin (another?) CPU's cache"]
109  fn_htab_lru_map_update_elem4 [shape=rectangle,fillcolor=3,
110    label="Update hashmap\nwith new element"]
111  fn_htab_lru_map_update_elem5 [shape=oval,label="return 0"]
112  fn_htab_lru_map_update_elem_EBUSY [shape=oval,label="return -EBUSY"]
113  fn_htab_lru_map_update_elem_EEXIST [shape=oval,label="return -EEXIST"]
114  fn_htab_lru_map_update_elem_ENOENT [shape=oval,label="return -ENOENT"]
115
116  begin -> local_freelist_check
117  // The initial per-CPU lock (loc_l->lock for common, l->lock for percpu) is
118  // acquired before the local freelist check; rqspinlock failure here exits
119  // directly to -ENOMEM (no recovery needed: nothing was removed yet).
120  local_freelist_check -> rqspinlock_failed [style=dashed,
121    xlabel="acquire fails"]
122  local_freelist_check -> use_local_node [xlabel="Y"]
123  local_freelist_check -> common_lru_check [xlabel="N"]
124  common_lru_check -> fn_bpf_lru_list_pop_free_to_local [xlabel="Y"]
125  common_lru_check -> fn___bpf_lru_list_shrink_inactive [xlabel="N"]
126  // Global lru_list lock acquire failure in pop_free_to_local: skip refill,
127  // fall through to the steal path. Not ENOMEM by itself.
128  fn_bpf_lru_list_pop_free_to_local -> common_lru_check2 [style=dashed,
129    xlabel="global lru_lock\nacquire fails"]
130  fn_bpf_lru_list_pop_free_to_local -> fn___bpf_lru_node_move_to_free
131  fn___bpf_lru_node_move_to_free ->
132    fn_bpf_lru_list_pop_free_to_local2 [xlabel="Y"]
133  fn___bpf_lru_node_move_to_free ->
134    fn___bpf_lru_list_shrink_inactive [xlabel="N"]
135  fn___bpf_lru_list_shrink_inactive -> fn___bpf_lru_list_shrink
136  fn___bpf_lru_list_shrink -> fn_bpf_lru_list_pop_free_to_local2 [xlabel = "Y"]
137  fn___bpf_lru_list_shrink -> fn___bpf_lru_list_shrink2 [xlabel="N"]
138  fn___bpf_lru_list_shrink2 -> fn___bpf_lru_list_shrink3
139  fn___bpf_lru_list_shrink3 -> local_freelist_check2
140  local_freelist_check2 -> local_freelist_check4 [xlabel = "Y"]
141  local_freelist_check2 -> common_lru_check2 [xlabel = "N"]
142  common_lru_check2 -> local_freelist_check5 [xlabel = "Y"]
143  common_lru_check2 -> fn_htab_lru_map_update_elem_ENOMEM [xlabel = "N"]
144  local_freelist_check5 -> fn_htab_lru_map_update_elem [xlabel = "Y"]
145  local_freelist_check5 -> local_freelist_check6 [xlabel = "N"]
146  local_freelist_check6 -> local_freelist_check7
147  local_freelist_check7 -> fn_htab_lru_map_update_elem
148
149  // Steal-loop victim lock failure is silent: treat as "no node found here"
150  // and continue to next CPU; same edge as the existing "N" path.
151  local_freelist_check5 -> fn_htab_lru_map_update_elem2 [style=dashed,
152    xlabel="victim's lock\nfails: skip"]
153  // After a successful steal, re-acquire the local loc_l->lock. On failure
154  // the stolen node is published to free_llist (recovered, not orphaned)
155  // and the update returns -ENOMEM.
156  fn_htab_lru_map_update_elem -> post_steal_lock [xlabel = "Y"]
157  post_steal_lock -> fn_htab_lru_map_update_elem3 [xlabel = "OK"]
158  post_steal_lock -> post_steal_to_free_llist [style=dashed,
159    xlabel="loc_l->lock\nacquire fails"]
160  post_steal_to_free_llist -> fn_htab_lru_map_update_elem_ENOMEM
161  fn_htab_lru_map_update_elem -> fn_htab_lru_map_update_elem2  [xlabel = "N"]
162  fn_htab_lru_map_update_elem2 ->
163    fn_htab_lru_map_update_elem_ENOMEM [xlabel = "Y"]
164  fn_htab_lru_map_update_elem2 -> local_freelist_check5 [xlabel = "N"]
165  fn_htab_lru_map_update_elem3 -> fn_htab_lru_map_update_elem4
166
167  // Shared rqspinlock-failure terminal collapses to the same -ENOMEM exit.
168  rqspinlock_failed -> fn_htab_lru_map_update_elem_ENOMEM
169
170  use_local_node -> fn_htab_lru_map_update_elem4
171  fn_bpf_lru_list_pop_free_to_local2 -> fn_htab_lru_map_update_elem4
172  local_freelist_check4 -> fn_htab_lru_map_update_elem4
173
174  fn_htab_lru_map_update_elem4 -> fn_htab_lru_map_update_elem5 [headlabel="Success"]
175  fn_htab_lru_map_update_elem4 ->
176    fn_htab_lru_map_update_elem_EBUSY [xlabel="Hashtab lock failed"]
177  fn_htab_lru_map_update_elem4 ->
178    fn_htab_lru_map_update_elem_EEXIST [xlabel="BPF_EXIST set and\nkey already exists"]
179  fn_htab_lru_map_update_elem4 ->
180    fn_htab_lru_map_update_elem_ENOENT [headlabel="BPF_NOEXIST set\nand no such entry"]
181
182  // Create invisible pad nodes to line up various nodes
183  pad0 [style=invis]
184  pad1 [style=invis]
185  pad2 [style=invis]
186  pad3 [style=invis]
187  pad4 [style=invis]
188
189  // Line up the key with the top of the graph
190  no_lock -> local_lock [style=invis]
191  local_lock -> lru_lock [style=invis]
192  lru_lock -> hash_lock [style=invis]
193  hash_lock -> remote_lock [style=invis]
194  remote_lock -> local_freelist_check5 [style=invis]
195  remote_lock -> fn___bpf_lru_list_shrink [style=invis]
196
197  // Line up return code nodes at the bottom of the graph
198  fn_htab_lru_map_update_elem -> pad0 [style=invis]
199  pad0 -> pad1 [style=invis]
200  pad1 -> pad2 [style=invis]
201  //pad2-> fn_htab_lru_map_update_elem_ENOMEM [style=invis]
202  fn_htab_lru_map_update_elem4 -> pad3 [style=invis]
203  pad3 -> fn_htab_lru_map_update_elem5  [style=invis]
204  pad3 -> fn_htab_lru_map_update_elem_EBUSY  [style=invis]
205  pad3 -> fn_htab_lru_map_update_elem_EEXIST  [style=invis]
206  pad3 -> fn_htab_lru_map_update_elem_ENOENT  [style=invis]
207
208  // Reduce diagram width by forcing some nodes to appear above others
209  local_freelist_check4 -> fn_htab_lru_map_update_elem3 [style=invis]
210  common_lru_check2 -> pad4 [style=invis]
211  pad4 -> local_freelist_check5 [style=invis]
212}
213