1// SPDX-License-Identifier: GPL-2.0-only 2// Copyright (C) 2022-2023 Isovalent, Inc. 3digraph { 4 node [colorscheme=accent4,style=filled] # Apply colorscheme to all nodes 5 graph [splines=ortho, nodesep=1] 6 7 subgraph cluster_key { 8 label = "Key\n(locks held during operation)"; 9 rankdir = TB; 10 11 remote_lock [shape=rectangle,fillcolor=4,label="remote CPU LRU lock"] 12 hash_lock [shape=rectangle,fillcolor=3,label="hashtab lock"] 13 lru_lock [shape=rectangle,fillcolor=2,label="LRU lock"] 14 local_lock [shape=rectangle,fillcolor=1,label="local CPU LRU lock"] 15 no_lock [shape=rectangle,label="no locks held"] 16 } 17 18 begin [shape=oval,label="begin\nbpf_map_update()"] 19 20 // Nodes below with an 'fn_' prefix are roughly labeled by the C function 21 // names that initiate the corresponding logic in kernel/bpf/bpf_lru_list.c. 22 // Number suffixes and errno suffixes handle subsections of the corresponding 23 // logic in the function as of the writing of this dot. 24 // 25 // All LRU locks are rqspinlock_t. Every acquire can fail (AA self-deadlock 26 // or contention timeout); on failure the corresponding helper returns NULL 27 // and the caller propagates -ENOMEM. The "rqspinlock acquire failed" 28 // terminal below is reached via the dashed arrows from each acquire site. 29 30 rqspinlock_failed [shape=rectangle, 31 label="Any LRU rqspinlock\nacquire fails\n(AA or timeout)"] 32 33 // cf. __local_list_pop_free() / bpf_percpu_lru_pop_free() 34 local_freelist_check [shape=diamond,fillcolor=1, 35 label="Local freelist\nnode available?\n(lockless free_llist)"]; 36 use_local_node [shape=rectangle, 37 label="Use node owned\nby this CPU"] 38 39 // cf. bpf_lru_pop_free() 40 common_lru_check [shape=diamond, 41 label="Map created with\ncommon LRU?\n(!BPF_F_NO_COMMON_LRU)"]; 42 43 fn_bpf_lru_list_pop_free_to_local [shape=rectangle,fillcolor=2, 44 label="Flush local pending, 45 Rotate Global list, move 46 target_free 47 from global -> local"] 48 // Also corresponds to: 49 // fn__local_list_flush() 50 // fn_bpf_lru_list_rotate() 51 fn___bpf_lru_node_move_to_free[shape=diamond,fillcolor=2, 52 label="Able to free\ntarget_free\nnodes?"] 53 54 fn___bpf_lru_list_shrink_inactive [shape=rectangle,fillcolor=3, 55 label="Shrink inactive list 56 up to remaining 57 target_free 58 (global LRU -> local)"] 59 fn___bpf_lru_list_shrink [shape=diamond,fillcolor=2, 60 label="> 0 entries in\nlocal free list?"] 61 fn___bpf_lru_list_shrink2 [shape=rectangle,fillcolor=2, 62 label="Steal one node from 63 inactive, or if empty, 64 from active global list"] 65 fn___bpf_lru_list_shrink3 [shape=rectangle,fillcolor=3, 66 label="Try to remove\nnode from hashtab"] 67 68 local_freelist_check2 [shape=diamond,label="Htab removal\nsuccessful?"] 69 common_lru_check2 [shape=diamond, 70 label="Map created with\ncommon LRU?\n(!BPF_F_NO_COMMON_LRU)"]; 71 72 subgraph cluster_remote_lock { 73 label = "Iterate through CPUs\n(start from current)"; 74 style = dashed; 75 rankdir=LR; 76 77 local_freelist_check5 [shape=diamond,fillcolor=4, 78 label="Steal a node from\nper-cpu freelist?"] 79 local_freelist_check6 [shape=rectangle,fillcolor=4, 80 label="Steal a node from 81 (1) Unreferenced pending, or 82 (2) Any pending node"] 83 local_freelist_check7 [shape=rectangle,fillcolor=3, 84 label="Try to remove\nnode from hashtab"] 85 fn_htab_lru_map_update_elem [shape=diamond, 86 label="Stole node\nfrom remote\nCPU?"] 87 fn_htab_lru_map_update_elem2 [shape=diamond,label="Iterated\nall CPUs?"] 88 // Also corresponds to: 89 // use_local_node() 90 // fn__local_list_pop_pending() 91 } 92 93 // Post-steal: re-acquire local loc_l->lock to insert the stolen node into 94 // the local pending list. If the acquire fails, the stolen node is published 95 // to the lockless local free_llist so the next pop on this CPU picks it up 96 // instead of orphaning it. 97 post_steal_lock [shape=diamond,fillcolor=1, 98 label="Acquire local\nloc_l->lock\nto add pending"] 99 post_steal_to_free_llist [shape=rectangle, 100 label="Publish stolen node to\nlocal free_llist (lockless)"] 101 102 fn_bpf_lru_list_pop_free_to_local2 [shape=rectangle, 103 label="Use node that was\nnot recently referenced"] 104 local_freelist_check4 [shape=rectangle, 105 label="Use node that was\nactively referenced\nin global list"] 106 fn_htab_lru_map_update_elem_ENOMEM [shape=oval,label="return -ENOMEM"] 107 fn_htab_lru_map_update_elem3 [shape=rectangle, 108 label="Use node that was\nactively referenced\nin (another?) CPU's cache"] 109 fn_htab_lru_map_update_elem4 [shape=rectangle,fillcolor=3, 110 label="Update hashmap\nwith new element"] 111 fn_htab_lru_map_update_elem5 [shape=oval,label="return 0"] 112 fn_htab_lru_map_update_elem_EBUSY [shape=oval,label="return -EBUSY"] 113 fn_htab_lru_map_update_elem_EEXIST [shape=oval,label="return -EEXIST"] 114 fn_htab_lru_map_update_elem_ENOENT [shape=oval,label="return -ENOENT"] 115 116 begin -> local_freelist_check 117 // The initial per-CPU lock (loc_l->lock for common, l->lock for percpu) is 118 // acquired before the local freelist check; rqspinlock failure here exits 119 // directly to -ENOMEM (no recovery needed: nothing was removed yet). 120 local_freelist_check -> rqspinlock_failed [style=dashed, 121 xlabel="acquire fails"] 122 local_freelist_check -> use_local_node [xlabel="Y"] 123 local_freelist_check -> common_lru_check [xlabel="N"] 124 common_lru_check -> fn_bpf_lru_list_pop_free_to_local [xlabel="Y"] 125 common_lru_check -> fn___bpf_lru_list_shrink_inactive [xlabel="N"] 126 // Global lru_list lock acquire failure in pop_free_to_local: skip refill, 127 // fall through to the steal path. Not ENOMEM by itself. 128 fn_bpf_lru_list_pop_free_to_local -> common_lru_check2 [style=dashed, 129 xlabel="global lru_lock\nacquire fails"] 130 fn_bpf_lru_list_pop_free_to_local -> fn___bpf_lru_node_move_to_free 131 fn___bpf_lru_node_move_to_free -> 132 fn_bpf_lru_list_pop_free_to_local2 [xlabel="Y"] 133 fn___bpf_lru_node_move_to_free -> 134 fn___bpf_lru_list_shrink_inactive [xlabel="N"] 135 fn___bpf_lru_list_shrink_inactive -> fn___bpf_lru_list_shrink 136 fn___bpf_lru_list_shrink -> fn_bpf_lru_list_pop_free_to_local2 [xlabel = "Y"] 137 fn___bpf_lru_list_shrink -> fn___bpf_lru_list_shrink2 [xlabel="N"] 138 fn___bpf_lru_list_shrink2 -> fn___bpf_lru_list_shrink3 139 fn___bpf_lru_list_shrink3 -> local_freelist_check2 140 local_freelist_check2 -> local_freelist_check4 [xlabel = "Y"] 141 local_freelist_check2 -> common_lru_check2 [xlabel = "N"] 142 common_lru_check2 -> local_freelist_check5 [xlabel = "Y"] 143 common_lru_check2 -> fn_htab_lru_map_update_elem_ENOMEM [xlabel = "N"] 144 local_freelist_check5 -> fn_htab_lru_map_update_elem [xlabel = "Y"] 145 local_freelist_check5 -> local_freelist_check6 [xlabel = "N"] 146 local_freelist_check6 -> local_freelist_check7 147 local_freelist_check7 -> fn_htab_lru_map_update_elem 148 149 // Steal-loop victim lock failure is silent: treat as "no node found here" 150 // and continue to next CPU; same edge as the existing "N" path. 151 local_freelist_check5 -> fn_htab_lru_map_update_elem2 [style=dashed, 152 xlabel="victim's lock\nfails: skip"] 153 // After a successful steal, re-acquire the local loc_l->lock. On failure 154 // the stolen node is published to free_llist (recovered, not orphaned) 155 // and the update returns -ENOMEM. 156 fn_htab_lru_map_update_elem -> post_steal_lock [xlabel = "Y"] 157 post_steal_lock -> fn_htab_lru_map_update_elem3 [xlabel = "OK"] 158 post_steal_lock -> post_steal_to_free_llist [style=dashed, 159 xlabel="loc_l->lock\nacquire fails"] 160 post_steal_to_free_llist -> fn_htab_lru_map_update_elem_ENOMEM 161 fn_htab_lru_map_update_elem -> fn_htab_lru_map_update_elem2 [xlabel = "N"] 162 fn_htab_lru_map_update_elem2 -> 163 fn_htab_lru_map_update_elem_ENOMEM [xlabel = "Y"] 164 fn_htab_lru_map_update_elem2 -> local_freelist_check5 [xlabel = "N"] 165 fn_htab_lru_map_update_elem3 -> fn_htab_lru_map_update_elem4 166 167 // Shared rqspinlock-failure terminal collapses to the same -ENOMEM exit. 168 rqspinlock_failed -> fn_htab_lru_map_update_elem_ENOMEM 169 170 use_local_node -> fn_htab_lru_map_update_elem4 171 fn_bpf_lru_list_pop_free_to_local2 -> fn_htab_lru_map_update_elem4 172 local_freelist_check4 -> fn_htab_lru_map_update_elem4 173 174 fn_htab_lru_map_update_elem4 -> fn_htab_lru_map_update_elem5 [headlabel="Success"] 175 fn_htab_lru_map_update_elem4 -> 176 fn_htab_lru_map_update_elem_EBUSY [xlabel="Hashtab lock failed"] 177 fn_htab_lru_map_update_elem4 -> 178 fn_htab_lru_map_update_elem_EEXIST [xlabel="BPF_EXIST set and\nkey already exists"] 179 fn_htab_lru_map_update_elem4 -> 180 fn_htab_lru_map_update_elem_ENOENT [headlabel="BPF_NOEXIST set\nand no such entry"] 181 182 // Create invisible pad nodes to line up various nodes 183 pad0 [style=invis] 184 pad1 [style=invis] 185 pad2 [style=invis] 186 pad3 [style=invis] 187 pad4 [style=invis] 188 189 // Line up the key with the top of the graph 190 no_lock -> local_lock [style=invis] 191 local_lock -> lru_lock [style=invis] 192 lru_lock -> hash_lock [style=invis] 193 hash_lock -> remote_lock [style=invis] 194 remote_lock -> local_freelist_check5 [style=invis] 195 remote_lock -> fn___bpf_lru_list_shrink [style=invis] 196 197 // Line up return code nodes at the bottom of the graph 198 fn_htab_lru_map_update_elem -> pad0 [style=invis] 199 pad0 -> pad1 [style=invis] 200 pad1 -> pad2 [style=invis] 201 //pad2-> fn_htab_lru_map_update_elem_ENOMEM [style=invis] 202 fn_htab_lru_map_update_elem4 -> pad3 [style=invis] 203 pad3 -> fn_htab_lru_map_update_elem5 [style=invis] 204 pad3 -> fn_htab_lru_map_update_elem_EBUSY [style=invis] 205 pad3 -> fn_htab_lru_map_update_elem_EEXIST [style=invis] 206 pad3 -> fn_htab_lru_map_update_elem_ENOENT [style=invis] 207 208 // Reduce diagram width by forcing some nodes to appear above others 209 local_freelist_check4 -> fn_htab_lru_map_update_elem3 [style=invis] 210 common_lru_check2 -> pad4 [style=invis] 211 pad4 -> local_freelist_check5 [style=invis] 212} 213