1 // SPDX-License-Identifier: GPL-2.0 2 /* XSKMAP used for AF_XDP sockets 3 * Copyright(c) 2018 Intel Corporation. 4 */ 5 6 #include <linux/bpf.h> 7 #include <linux/filter.h> 8 #include <linux/capability.h> 9 #include <net/xdp_sock.h> 10 #include <linux/slab.h> 11 #include <linux/sched.h> 12 13 #include "xsk.h" 14 15 static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, 16 struct xdp_sock __rcu **map_entry) 17 { 18 struct xsk_map_node *node; 19 20 node = bpf_map_kzalloc(&map->map, sizeof(*node), 21 GFP_ATOMIC | __GFP_NOWARN); 22 if (!node) 23 return ERR_PTR(-ENOMEM); 24 25 bpf_map_inc(&map->map); 26 27 node->map = map; 28 node->map_entry = map_entry; 29 return node; 30 } 31 32 static void xsk_map_node_free(struct xsk_map_node *node) 33 { 34 bpf_map_put(&node->map->map); 35 kfree(node); 36 } 37 38 static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node) 39 { 40 spin_lock_bh(&xs->map_list_lock); 41 list_add_tail(&node->node, &xs->map_list); 42 spin_unlock_bh(&xs->map_list_lock); 43 } 44 45 static void xsk_map_sock_delete(struct xdp_sock *xs, 46 struct xdp_sock __rcu **map_entry) 47 { 48 struct xsk_map_node *n, *tmp; 49 50 spin_lock_bh(&xs->map_list_lock); 51 list_for_each_entry_safe(n, tmp, &xs->map_list, node) { 52 if (map_entry == n->map_entry) { 53 list_del(&n->node); 54 xsk_map_node_free(n); 55 } 56 } 57 spin_unlock_bh(&xs->map_list_lock); 58 } 59 60 static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) 61 { 62 struct xsk_map *m; 63 int numa_node; 64 u64 size; 65 66 if (!capable(CAP_NET_ADMIN)) 67 return ERR_PTR(-EPERM); 68 69 if (attr->max_entries == 0 || attr->key_size != 4 || 70 attr->value_size != 4 || 71 attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)) 72 return ERR_PTR(-EINVAL); 73 74 numa_node = bpf_map_attr_numa_node(attr); 75 size = struct_size(m, xsk_map, attr->max_entries); 76 77 m = bpf_map_area_alloc(size, numa_node); 78 if (!m) 79 return ERR_PTR(-ENOMEM); 80 81 bpf_map_init_from_attr(&m->map, attr); 82 spin_lock_init(&m->lock); 83 84 return &m->map; 85 } 86 87 static void xsk_map_free(struct bpf_map *map) 88 { 89 struct xsk_map *m = container_of(map, struct xsk_map, map); 90 91 synchronize_net(); 92 bpf_map_area_free(m); 93 } 94 95 static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 96 { 97 struct xsk_map *m = container_of(map, struct xsk_map, map); 98 u32 index = key ? *(u32 *)key : U32_MAX; 99 u32 *next = next_key; 100 101 if (index >= m->map.max_entries) { 102 *next = 0; 103 return 0; 104 } 105 106 if (index == m->map.max_entries - 1) 107 return -ENOENT; 108 *next = index + 1; 109 return 0; 110 } 111 112 static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 113 { 114 const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2; 115 struct bpf_insn *insn = insn_buf; 116 117 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 118 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); 119 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *))); 120 *insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map)); 121 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp); 122 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock *), ret, ret, 0); 123 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 124 *insn++ = BPF_MOV64_IMM(ret, 0); 125 return insn - insn_buf; 126 } 127 128 /* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or 129 * by local_bh_disable() (from XDP calls inside NAPI). The 130 * rcu_read_lock_bh_held() below makes lockdep accept both. 131 */ 132 static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key) 133 { 134 struct xsk_map *m = container_of(map, struct xsk_map, map); 135 136 if (key >= map->max_entries) 137 return NULL; 138 139 return rcu_dereference_check(m->xsk_map[key], rcu_read_lock_bh_held()); 140 } 141 142 static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) 143 { 144 return __xsk_map_lookup_elem(map, *(u32 *)key); 145 } 146 147 static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key) 148 { 149 return ERR_PTR(-EOPNOTSUPP); 150 } 151 152 static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, 153 u64 map_flags) 154 { 155 struct xsk_map *m = container_of(map, struct xsk_map, map); 156 struct xdp_sock __rcu **map_entry; 157 struct xdp_sock *xs, *old_xs; 158 u32 i = *(u32 *)key, fd = *(u32 *)value; 159 struct xsk_map_node *node; 160 struct socket *sock; 161 int err; 162 163 if (unlikely(map_flags > BPF_EXIST)) 164 return -EINVAL; 165 if (unlikely(i >= m->map.max_entries)) 166 return -E2BIG; 167 168 sock = sockfd_lookup(fd, &err); 169 if (!sock) 170 return err; 171 172 if (sock->sk->sk_family != PF_XDP) { 173 sockfd_put(sock); 174 return -EOPNOTSUPP; 175 } 176 177 xs = (struct xdp_sock *)sock->sk; 178 179 map_entry = &m->xsk_map[i]; 180 node = xsk_map_node_alloc(m, map_entry); 181 if (IS_ERR(node)) { 182 sockfd_put(sock); 183 return PTR_ERR(node); 184 } 185 186 spin_lock_bh(&m->lock); 187 old_xs = rcu_dereference_protected(*map_entry, lockdep_is_held(&m->lock)); 188 if (old_xs == xs) { 189 err = 0; 190 goto out; 191 } else if (old_xs && map_flags == BPF_NOEXIST) { 192 err = -EEXIST; 193 goto out; 194 } else if (!old_xs && map_flags == BPF_EXIST) { 195 err = -ENOENT; 196 goto out; 197 } 198 xsk_map_sock_add(xs, node); 199 rcu_assign_pointer(*map_entry, xs); 200 if (old_xs) 201 xsk_map_sock_delete(old_xs, map_entry); 202 spin_unlock_bh(&m->lock); 203 sockfd_put(sock); 204 return 0; 205 206 out: 207 spin_unlock_bh(&m->lock); 208 sockfd_put(sock); 209 xsk_map_node_free(node); 210 return err; 211 } 212 213 static int xsk_map_delete_elem(struct bpf_map *map, void *key) 214 { 215 struct xsk_map *m = container_of(map, struct xsk_map, map); 216 struct xdp_sock __rcu **map_entry; 217 struct xdp_sock *old_xs; 218 int k = *(u32 *)key; 219 220 if (k >= map->max_entries) 221 return -EINVAL; 222 223 spin_lock_bh(&m->lock); 224 map_entry = &m->xsk_map[k]; 225 old_xs = unrcu_pointer(xchg(map_entry, NULL)); 226 if (old_xs) 227 xsk_map_sock_delete(old_xs, map_entry); 228 spin_unlock_bh(&m->lock); 229 230 return 0; 231 } 232 233 static int xsk_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags) 234 { 235 return __bpf_xdp_redirect_map(map, ifindex, flags, 0, 236 __xsk_map_lookup_elem); 237 } 238 239 void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, 240 struct xdp_sock __rcu **map_entry) 241 { 242 spin_lock_bh(&map->lock); 243 if (rcu_access_pointer(*map_entry) == xs) { 244 rcu_assign_pointer(*map_entry, NULL); 245 xsk_map_sock_delete(xs, map_entry); 246 } 247 spin_unlock_bh(&map->lock); 248 } 249 250 static bool xsk_map_meta_equal(const struct bpf_map *meta0, 251 const struct bpf_map *meta1) 252 { 253 return meta0->max_entries == meta1->max_entries && 254 bpf_map_meta_equal(meta0, meta1); 255 } 256 257 static int xsk_map_btf_id; 258 const struct bpf_map_ops xsk_map_ops = { 259 .map_meta_equal = xsk_map_meta_equal, 260 .map_alloc = xsk_map_alloc, 261 .map_free = xsk_map_free, 262 .map_get_next_key = xsk_map_get_next_key, 263 .map_lookup_elem = xsk_map_lookup_elem, 264 .map_gen_lookup = xsk_map_gen_lookup, 265 .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only, 266 .map_update_elem = xsk_map_update_elem, 267 .map_delete_elem = xsk_map_delete_elem, 268 .map_check_btf = map_check_no_btf, 269 .map_btf_name = "xsk_map", 270 .map_btf_id = &xsk_map_btf_id, 271 .map_redirect = xsk_map_redirect, 272 }; 273