1 /* 2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Neither the names of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * Alternatively, this software may be distributed under the terms of the 17 * GNU General Public License ("GPL") version 2 as published by the Free 18 * Software Foundation. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <linux/module.h> 34 #include <linux/pid.h> 35 #include <linux/pid_namespace.h> 36 #include <linux/mutex.h> 37 #include <net/netlink.h> 38 #include <rdma/rdma_cm.h> 39 #include <rdma/rdma_netlink.h> 40 #include <rdma/frmr_pools.h> 41 42 #include "core_priv.h" 43 #include "cma_priv.h" 44 #include "restrack.h" 45 #include "uverbs.h" 46 #include "frmr_pools.h" 47 48 /* 49 * This determines whether a non-privileged user is allowed to specify a 50 * controlled QKEY or not, when true non-privileged user is allowed to specify 51 * a controlled QKEY. 52 */ 53 static bool privileged_qkey; 54 55 typedef int (*res_fill_func_t)(struct sk_buff*, bool, 56 struct rdma_restrack_entry*, uint32_t); 57 58 /* 59 * Sort array elements by the netlink attribute name 60 */ 61 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { 62 [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 }, 63 [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 }, 64 [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING, 65 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 66 [RDMA_NLDEV_ATTR_CHARDEV_TYPE] = { .type = NLA_NUL_STRING, 67 .len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE }, 68 [RDMA_NLDEV_ATTR_DEV_DIM] = { .type = NLA_U8 }, 69 [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 }, 70 [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, 71 .len = IB_DEVICE_NAME_MAX }, 72 [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 }, 73 [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING, 74 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 75 [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED }, 76 [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED }, 77 [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 }, 78 [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING, 79 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 80 [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 }, 81 [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 }, 82 [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 }, 83 [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 }, 84 [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING, 85 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 86 [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 }, 87 [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING, 88 .len = IFNAMSIZ }, 89 [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 }, 90 [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 }, 91 [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING, 92 .len = IFNAMSIZ }, 93 [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 }, 94 [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 }, 95 [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 }, 96 [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 }, 97 [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED }, 98 [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 }, 99 [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED }, 100 [RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED }, 101 [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 }, 102 [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 }, 103 [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED }, 104 [RDMA_NLDEV_ATTR_RES_CTX] = { .type = NLA_NESTED }, 105 [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 }, 106 [RDMA_NLDEV_ATTR_RES_CTX_ENTRY] = { .type = NLA_NESTED }, 107 [RDMA_NLDEV_ATTR_RES_DST_ADDR] = { 108 .len = sizeof(struct __kernel_sockaddr_storage) }, 109 [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 }, 110 [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING, 111 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 112 [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 }, 113 [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 }, 114 [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 }, 115 [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED }, 116 [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 }, 117 [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 }, 118 [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED }, 119 [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 }, 120 [RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED }, 121 [RDMA_NLDEV_ATTR_RES_PDN] = { .type = NLA_U32 }, 122 [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED }, 123 [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 }, 124 [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 }, 125 [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 }, 126 [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED }, 127 [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED }, 128 [RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY }, 129 [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 }, 130 [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 }, 131 [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 }, 132 [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 }, 133 [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = { 134 .len = sizeof(struct __kernel_sockaddr_storage) }, 135 [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 }, 136 [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED }, 137 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED }, 138 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 }, 139 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING, 140 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 141 [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 }, 142 [RDMA_NLDEV_ATTR_RES_SUBTYPE] = { .type = NLA_NUL_STRING, 143 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 144 [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 }, 145 [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 }, 146 [RDMA_NLDEV_ATTR_RES_SRQ] = { .type = NLA_NESTED }, 147 [RDMA_NLDEV_ATTR_RES_SRQN] = { .type = NLA_U32 }, 148 [RDMA_NLDEV_ATTR_RES_SRQ_ENTRY] = { .type = NLA_NESTED }, 149 [RDMA_NLDEV_ATTR_MIN_RANGE] = { .type = NLA_U32 }, 150 [RDMA_NLDEV_ATTR_MAX_RANGE] = { .type = NLA_U32 }, 151 [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 }, 152 [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 }, 153 [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = { .type = NLA_U32 }, 154 [RDMA_NLDEV_ATTR_STAT_MODE] = { .type = NLA_U32 }, 155 [RDMA_NLDEV_ATTR_STAT_RES] = { .type = NLA_U32 }, 156 [RDMA_NLDEV_ATTR_STAT_COUNTER] = { .type = NLA_NESTED }, 157 [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY] = { .type = NLA_NESTED }, 158 [RDMA_NLDEV_ATTR_STAT_COUNTER_ID] = { .type = NLA_U32 }, 159 [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS] = { .type = NLA_NESTED }, 160 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY] = { .type = NLA_NESTED }, 161 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING }, 162 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 }, 163 [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 }, 164 [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 }, 165 [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 }, 166 [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, 167 [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 }, 168 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 }, 169 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 }, 170 [RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = { .type = NLA_U8 }, 171 [RDMA_NLDEV_ATTR_DRIVER_DETAILS] = { .type = NLA_U8 }, 172 [RDMA_NLDEV_ATTR_DEV_TYPE] = { .type = NLA_U8 }, 173 [RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING }, 174 [RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 }, 175 [RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 }, 176 [RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED] = { .type = NLA_U8 }, 177 [RDMA_NLDEV_ATTR_FRMR_POOLS] = { .type = NLA_NESTED }, 178 [RDMA_NLDEV_ATTR_FRMR_POOL_ENTRY] = { .type = NLA_NESTED }, 179 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY] = { .type = NLA_NESTED }, 180 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS] = { .type = NLA_U8 }, 181 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS] = { .type = NLA_U32 }, 182 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY] = { .type = NLA_U64 }, 183 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS] = { .type = NLA_U64 }, 184 [RDMA_NLDEV_ATTR_FRMR_POOL_QUEUE_HANDLES] = { .type = NLA_U32 }, 185 [RDMA_NLDEV_ATTR_FRMR_POOL_MAX_IN_USE] = { .type = NLA_U64 }, 186 [RDMA_NLDEV_ATTR_FRMR_POOL_IN_USE] = { .type = NLA_U64 }, 187 [RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD] = { .type = NLA_U32 }, 188 [RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES] = { .type = NLA_U32 }, 189 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY] = { .type = NLA_U64 }, 190 }; 191 192 static int put_driver_name_print_type(struct sk_buff *msg, const char *name, 193 enum rdma_nldev_print_type print_type) 194 { 195 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name)) 196 return -EMSGSIZE; 197 if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC && 198 nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type)) 199 return -EMSGSIZE; 200 201 return 0; 202 } 203 204 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, 205 enum rdma_nldev_print_type print_type, 206 u32 value) 207 { 208 if (put_driver_name_print_type(msg, name, print_type)) 209 return -EMSGSIZE; 210 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value)) 211 return -EMSGSIZE; 212 213 return 0; 214 } 215 216 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, 217 enum rdma_nldev_print_type print_type, 218 u64 value) 219 { 220 if (put_driver_name_print_type(msg, name, print_type)) 221 return -EMSGSIZE; 222 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value, 223 RDMA_NLDEV_ATTR_PAD)) 224 return -EMSGSIZE; 225 226 return 0; 227 } 228 229 int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name, 230 const char *str) 231 { 232 if (put_driver_name_print_type(msg, name, 233 RDMA_NLDEV_PRINT_TYPE_UNSPEC)) 234 return -EMSGSIZE; 235 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str)) 236 return -EMSGSIZE; 237 238 return 0; 239 } 240 EXPORT_SYMBOL(rdma_nl_put_driver_string); 241 242 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value) 243 { 244 return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC, 245 value); 246 } 247 EXPORT_SYMBOL(rdma_nl_put_driver_u32); 248 249 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name, 250 u32 value) 251 { 252 return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX, 253 value); 254 } 255 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex); 256 257 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value) 258 { 259 return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC, 260 value); 261 } 262 EXPORT_SYMBOL(rdma_nl_put_driver_u64); 263 264 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value) 265 { 266 return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX, 267 value); 268 } 269 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex); 270 271 bool rdma_nl_get_privileged_qkey(void) 272 { 273 return privileged_qkey; 274 } 275 EXPORT_SYMBOL(rdma_nl_get_privileged_qkey); 276 277 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device) 278 { 279 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) 280 return -EMSGSIZE; 281 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, 282 dev_name(&device->dev))) 283 return -EMSGSIZE; 284 285 return 0; 286 } 287 288 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) 289 { 290 char fw[IB_FW_VERSION_NAME_MAX]; 291 int ret = 0; 292 u32 port; 293 294 if (fill_nldev_handle(msg, device)) 295 return -EMSGSIZE; 296 297 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device))) 298 return -EMSGSIZE; 299 300 BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64)); 301 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, 302 device->attrs.device_cap_flags, 303 RDMA_NLDEV_ATTR_PAD)) 304 return -EMSGSIZE; 305 306 ib_get_device_fw_str(device, fw); 307 /* Device without FW has strlen(fw) = 0 */ 308 if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw)) 309 return -EMSGSIZE; 310 311 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID, 312 be64_to_cpu(device->node_guid), 313 RDMA_NLDEV_ATTR_PAD)) 314 return -EMSGSIZE; 315 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, 316 be64_to_cpu(device->attrs.sys_image_guid), 317 RDMA_NLDEV_ATTR_PAD)) 318 return -EMSGSIZE; 319 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type)) 320 return -EMSGSIZE; 321 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim)) 322 return -EMSGSIZE; 323 324 if (device->type && 325 nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_TYPE, device->type)) 326 return -EMSGSIZE; 327 328 if (device->parent && 329 nla_put_string(msg, RDMA_NLDEV_ATTR_PARENT_NAME, 330 dev_name(&device->parent->dev))) 331 return -EMSGSIZE; 332 333 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE, 334 device->name_assign_type)) 335 return -EMSGSIZE; 336 337 /* 338 * Link type is determined on first port and mlx4 device 339 * which can potentially have two different link type for the same 340 * IB device is considered as better to be avoided in the future, 341 */ 342 port = rdma_start_port(device); 343 if (rdma_cap_opa_mad(device, port)) 344 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa"); 345 else if (rdma_protocol_ib(device, port)) 346 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib"); 347 else if (rdma_protocol_iwarp(device, port)) 348 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw"); 349 else if (rdma_protocol_roce(device, port)) 350 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce"); 351 else if (rdma_protocol_usnic(device, port)) 352 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, 353 "usnic"); 354 return ret; 355 } 356 357 static int fill_port_info(struct sk_buff *msg, 358 struct ib_device *device, u32 port, 359 const struct net *net) 360 { 361 struct net_device *netdev = NULL; 362 struct ib_port_attr attr; 363 int ret; 364 u64 cap_flags = 0; 365 366 if (fill_nldev_handle(msg, device)) 367 return -EMSGSIZE; 368 369 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) 370 return -EMSGSIZE; 371 372 ret = ib_query_port(device, port, &attr); 373 if (ret) 374 return ret; 375 376 if (rdma_protocol_ib(device, port)) { 377 BUILD_BUG_ON((sizeof(attr.port_cap_flags) + 378 sizeof(attr.port_cap_flags2)) > sizeof(u64)); 379 cap_flags = attr.port_cap_flags | 380 ((u64)attr.port_cap_flags2 << 32); 381 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, 382 cap_flags, RDMA_NLDEV_ATTR_PAD)) 383 return -EMSGSIZE; 384 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, 385 attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD)) 386 return -EMSGSIZE; 387 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid)) 388 return -EMSGSIZE; 389 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid)) 390 return -EMSGSIZE; 391 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc)) 392 return -EMSGSIZE; 393 } 394 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state)) 395 return -EMSGSIZE; 396 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state)) 397 return -EMSGSIZE; 398 399 netdev = ib_device_get_netdev(device, port); 400 if (netdev && net_eq(dev_net(netdev), net)) { 401 ret = nla_put_u32(msg, 402 RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); 403 if (ret) 404 goto out; 405 ret = nla_put_string(msg, 406 RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); 407 } 408 409 out: 410 dev_put(netdev); 411 return ret; 412 } 413 414 static int fill_res_info_entry(struct sk_buff *msg, 415 const char *name, u64 curr) 416 { 417 struct nlattr *entry_attr; 418 419 entry_attr = nla_nest_start_noflag(msg, 420 RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY); 421 if (!entry_attr) 422 return -EMSGSIZE; 423 424 if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name)) 425 goto err; 426 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 427 RDMA_NLDEV_ATTR_PAD)) 428 goto err; 429 430 nla_nest_end(msg, entry_attr); 431 return 0; 432 433 err: 434 nla_nest_cancel(msg, entry_attr); 435 return -EMSGSIZE; 436 } 437 438 static int fill_res_info(struct sk_buff *msg, struct ib_device *device, 439 bool show_details) 440 { 441 static const char * const names[RDMA_RESTRACK_MAX] = { 442 [RDMA_RESTRACK_PD] = "pd", 443 [RDMA_RESTRACK_CQ] = "cq", 444 [RDMA_RESTRACK_QP] = "qp", 445 [RDMA_RESTRACK_CM_ID] = "cm_id", 446 [RDMA_RESTRACK_MR] = "mr", 447 [RDMA_RESTRACK_CTX] = "ctx", 448 [RDMA_RESTRACK_SRQ] = "srq", 449 }; 450 451 struct nlattr *table_attr; 452 int ret, i, curr; 453 454 if (fill_nldev_handle(msg, device)) 455 return -EMSGSIZE; 456 457 table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY); 458 if (!table_attr) 459 return -EMSGSIZE; 460 461 for (i = 0; i < RDMA_RESTRACK_MAX; i++) { 462 if (!names[i]) 463 continue; 464 curr = rdma_restrack_count(device, i, show_details); 465 ret = fill_res_info_entry(msg, names[i], curr); 466 if (ret) 467 goto err; 468 } 469 470 nla_nest_end(msg, table_attr); 471 return 0; 472 473 err: 474 nla_nest_cancel(msg, table_attr); 475 return ret; 476 } 477 478 static int fill_res_name_pid(struct sk_buff *msg, 479 struct rdma_restrack_entry *res) 480 { 481 int err = 0; 482 483 /* 484 * For user resources, user is should read /proc/PID/comm to get the 485 * name of the task file. 486 */ 487 if (rdma_is_kernel_res(res)) { 488 err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, 489 res->kern_name); 490 } else { 491 pid_t pid; 492 493 pid = task_pid_vnr(res->task); 494 /* 495 * Task is dead and in zombie state. 496 * There is no need to print PID anymore. 497 */ 498 if (pid) 499 /* 500 * This part is racy, task can be killed and PID will 501 * be zero right here but it is ok, next query won't 502 * return PID. We don't promise real-time reflection 503 * of SW objects. 504 */ 505 err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid); 506 } 507 508 return err ? -EMSGSIZE : 0; 509 } 510 511 static int fill_res_qp_entry_query(struct sk_buff *msg, 512 struct rdma_restrack_entry *res, 513 struct ib_device *dev, 514 struct ib_qp *qp) 515 { 516 struct ib_qp_init_attr qp_init_attr; 517 struct ib_qp_attr qp_attr; 518 int ret; 519 520 ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr); 521 if (ret) 522 return ret; 523 524 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) { 525 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN, 526 qp_attr.dest_qp_num)) 527 goto err; 528 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN, 529 qp_attr.rq_psn)) 530 goto err; 531 } 532 533 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn)) 534 goto err; 535 536 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC || 537 qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) { 538 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE, 539 qp_attr.path_mig_state)) 540 goto err; 541 } 542 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type)) 543 goto err; 544 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state)) 545 goto err; 546 547 if (dev->ops.fill_res_qp_entry) 548 return dev->ops.fill_res_qp_entry(msg, qp); 549 return 0; 550 551 err: return -EMSGSIZE; 552 } 553 554 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, 555 struct rdma_restrack_entry *res, uint32_t port) 556 { 557 struct ib_qp *qp = container_of(res, struct ib_qp, res); 558 struct ib_device *dev = qp->device; 559 int ret; 560 561 if (port && port != qp->port) 562 return -EAGAIN; 563 564 /* In create_qp() port is not set yet */ 565 if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port)) 566 return -EMSGSIZE; 567 568 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num); 569 if (ret) 570 return -EMSGSIZE; 571 572 if (!rdma_is_kernel_res(res) && 573 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id)) 574 return -EMSGSIZE; 575 576 ret = fill_res_name_pid(msg, res); 577 if (ret) 578 return -EMSGSIZE; 579 580 return fill_res_qp_entry_query(msg, res, dev, qp); 581 } 582 583 static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, 584 struct rdma_restrack_entry *res, uint32_t port) 585 { 586 struct ib_qp *qp = container_of(res, struct ib_qp, res); 587 struct ib_device *dev = qp->device; 588 589 if (port && port != qp->port) 590 return -EAGAIN; 591 if (!dev->ops.fill_res_qp_entry_raw) 592 return -EINVAL; 593 return dev->ops.fill_res_qp_entry_raw(msg, qp); 594 } 595 596 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, 597 struct rdma_restrack_entry *res, uint32_t port) 598 { 599 struct rdma_id_private *id_priv = 600 container_of(res, struct rdma_id_private, res); 601 struct ib_device *dev = id_priv->id.device; 602 struct rdma_cm_id *cm_id = &id_priv->id; 603 604 if (port && port != cm_id->port_num) 605 return -EAGAIN; 606 607 if (cm_id->port_num && 608 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num)) 609 goto err; 610 611 if (id_priv->qp_num) { 612 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num)) 613 goto err; 614 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type)) 615 goto err; 616 } 617 618 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps)) 619 goto err; 620 621 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state)) 622 goto err; 623 624 if (cm_id->route.addr.src_addr.ss_family && 625 nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR, 626 sizeof(cm_id->route.addr.src_addr), 627 &cm_id->route.addr.src_addr)) 628 goto err; 629 if (cm_id->route.addr.dst_addr.ss_family && 630 nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR, 631 sizeof(cm_id->route.addr.dst_addr), 632 &cm_id->route.addr.dst_addr)) 633 goto err; 634 635 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id)) 636 goto err; 637 638 if (fill_res_name_pid(msg, res)) 639 goto err; 640 641 if (dev->ops.fill_res_cm_id_entry) 642 return dev->ops.fill_res_cm_id_entry(msg, cm_id); 643 return 0; 644 645 err: return -EMSGSIZE; 646 } 647 648 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin, 649 struct rdma_restrack_entry *res, uint32_t port) 650 { 651 struct ib_cq *cq = container_of(res, struct ib_cq, res); 652 struct ib_device *dev = cq->device; 653 654 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe)) 655 return -EMSGSIZE; 656 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, 657 atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD)) 658 return -EMSGSIZE; 659 660 /* Poll context is only valid for kernel CQs */ 661 if (rdma_is_kernel_res(res) && 662 nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx)) 663 return -EMSGSIZE; 664 665 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL))) 666 return -EMSGSIZE; 667 668 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id)) 669 return -EMSGSIZE; 670 if (!rdma_is_kernel_res(res) && 671 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, 672 cq->uobject->uevent.uobject.context->res.id)) 673 return -EMSGSIZE; 674 675 if (fill_res_name_pid(msg, res)) 676 return -EMSGSIZE; 677 678 return (dev->ops.fill_res_cq_entry) ? 679 dev->ops.fill_res_cq_entry(msg, cq) : 0; 680 } 681 682 static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, 683 struct rdma_restrack_entry *res, uint32_t port) 684 { 685 struct ib_cq *cq = container_of(res, struct ib_cq, res); 686 struct ib_device *dev = cq->device; 687 688 if (!dev->ops.fill_res_cq_entry_raw) 689 return -EINVAL; 690 return dev->ops.fill_res_cq_entry_raw(msg, cq); 691 } 692 693 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, 694 struct rdma_restrack_entry *res, uint32_t port) 695 { 696 struct ib_mr *mr = container_of(res, struct ib_mr, res); 697 struct ib_device *dev = mr->pd->device; 698 699 if (has_cap_net_admin) { 700 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey)) 701 return -EMSGSIZE; 702 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey)) 703 return -EMSGSIZE; 704 } 705 706 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 707 RDMA_NLDEV_ATTR_PAD)) 708 return -EMSGSIZE; 709 710 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) 711 return -EMSGSIZE; 712 713 if (!rdma_is_kernel_res(res) && 714 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id)) 715 return -EMSGSIZE; 716 717 if (fill_res_name_pid(msg, res)) 718 return -EMSGSIZE; 719 720 return (dev->ops.fill_res_mr_entry) ? 721 dev->ops.fill_res_mr_entry(msg, mr) : 722 0; 723 } 724 725 static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, 726 struct rdma_restrack_entry *res, uint32_t port) 727 { 728 struct ib_mr *mr = container_of(res, struct ib_mr, res); 729 struct ib_device *dev = mr->pd->device; 730 731 if (!dev->ops.fill_res_mr_entry_raw) 732 return -EINVAL; 733 return dev->ops.fill_res_mr_entry_raw(msg, mr); 734 } 735 736 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, 737 struct rdma_restrack_entry *res, uint32_t port) 738 { 739 struct ib_pd *pd = container_of(res, struct ib_pd, res); 740 741 if (has_cap_net_admin) { 742 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, 743 pd->local_dma_lkey)) 744 goto err; 745 if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) && 746 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, 747 pd->unsafe_global_rkey)) 748 goto err; 749 } 750 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, 751 atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD)) 752 goto err; 753 754 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id)) 755 goto err; 756 757 if (!rdma_is_kernel_res(res) && 758 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, 759 pd->uobject->context->res.id)) 760 goto err; 761 762 return fill_res_name_pid(msg, res); 763 764 err: return -EMSGSIZE; 765 } 766 767 static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin, 768 struct rdma_restrack_entry *res, uint32_t port) 769 { 770 struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res); 771 772 if (rdma_is_kernel_res(res)) 773 return 0; 774 775 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id)) 776 return -EMSGSIZE; 777 778 return fill_res_name_pid(msg, res); 779 } 780 781 static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range, 782 uint32_t max_range) 783 { 784 struct nlattr *entry_attr; 785 786 if (!min_range) 787 return 0; 788 789 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY); 790 if (!entry_attr) 791 return -EMSGSIZE; 792 793 if (min_range == max_range) { 794 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range)) 795 goto err; 796 } else { 797 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range)) 798 goto err; 799 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range)) 800 goto err; 801 } 802 nla_nest_end(msg, entry_attr); 803 return 0; 804 805 err: 806 nla_nest_cancel(msg, entry_attr); 807 return -EMSGSIZE; 808 } 809 810 static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq) 811 { 812 uint32_t min_range = 0, prev = 0; 813 struct rdma_restrack_entry *res; 814 struct rdma_restrack_root *rt; 815 struct nlattr *table_attr; 816 struct ib_qp *qp = NULL; 817 unsigned long id = 0; 818 819 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP); 820 if (!table_attr) 821 return -EMSGSIZE; 822 823 rt = &srq->device->res[RDMA_RESTRACK_QP]; 824 xa_lock(&rt->xa); 825 xa_for_each(&rt->xa, id, res) { 826 if (!rdma_restrack_get(res)) 827 continue; 828 829 qp = container_of(res, struct ib_qp, res); 830 if (!qp->srq || (qp->srq->res.id != srq->res.id)) { 831 rdma_restrack_put(res); 832 continue; 833 } 834 835 if (qp->qp_num < prev) 836 /* qp_num should be ascending */ 837 goto err_loop; 838 839 if (min_range == 0) { 840 min_range = qp->qp_num; 841 } else if (qp->qp_num > (prev + 1)) { 842 if (fill_res_range_qp_entry(msg, min_range, prev)) 843 goto err_loop; 844 845 min_range = qp->qp_num; 846 } 847 prev = qp->qp_num; 848 rdma_restrack_put(res); 849 } 850 851 xa_unlock(&rt->xa); 852 853 if (fill_res_range_qp_entry(msg, min_range, prev)) 854 goto err; 855 856 nla_nest_end(msg, table_attr); 857 return 0; 858 859 err_loop: 860 rdma_restrack_put(res); 861 xa_unlock(&rt->xa); 862 err: 863 nla_nest_cancel(msg, table_attr); 864 return -EMSGSIZE; 865 } 866 867 static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin, 868 struct rdma_restrack_entry *res, uint32_t port) 869 { 870 struct ib_srq *srq = container_of(res, struct ib_srq, res); 871 struct ib_device *dev = srq->device; 872 873 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id)) 874 goto err; 875 876 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type)) 877 goto err; 878 879 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id)) 880 goto err; 881 882 if (ib_srq_has_cq(srq->srq_type)) { 883 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, 884 srq->ext.cq->res.id)) 885 goto err; 886 } 887 888 if (fill_res_srq_qps(msg, srq)) 889 goto err; 890 891 if (fill_res_name_pid(msg, res)) 892 goto err; 893 894 if (dev->ops.fill_res_srq_entry) 895 return dev->ops.fill_res_srq_entry(msg, srq); 896 897 return 0; 898 899 err: 900 return -EMSGSIZE; 901 } 902 903 static int fill_res_srq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, 904 struct rdma_restrack_entry *res, uint32_t port) 905 { 906 struct ib_srq *srq = container_of(res, struct ib_srq, res); 907 struct ib_device *dev = srq->device; 908 909 if (!dev->ops.fill_res_srq_entry_raw) 910 return -EINVAL; 911 return dev->ops.fill_res_srq_entry_raw(msg, srq); 912 } 913 914 static int fill_stat_counter_mode(struct sk_buff *msg, 915 struct rdma_counter *counter) 916 { 917 struct rdma_counter_mode *m = &counter->mode; 918 919 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode)) 920 return -EMSGSIZE; 921 922 if (m->mode == RDMA_COUNTER_MODE_AUTO) { 923 if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) && 924 nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type)) 925 return -EMSGSIZE; 926 927 if ((m->mask & RDMA_COUNTER_MASK_PID) && 928 fill_res_name_pid(msg, &counter->res)) 929 return -EMSGSIZE; 930 } 931 932 return 0; 933 } 934 935 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn) 936 { 937 struct nlattr *entry_attr; 938 939 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY); 940 if (!entry_attr) 941 return -EMSGSIZE; 942 943 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) 944 goto err; 945 946 nla_nest_end(msg, entry_attr); 947 return 0; 948 949 err: 950 nla_nest_cancel(msg, entry_attr); 951 return -EMSGSIZE; 952 } 953 954 static int fill_stat_counter_qps(struct sk_buff *msg, 955 struct rdma_counter *counter) 956 { 957 struct rdma_restrack_entry *res; 958 struct rdma_restrack_root *rt; 959 struct nlattr *table_attr; 960 struct ib_qp *qp = NULL; 961 unsigned long id = 0; 962 int ret = 0; 963 964 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP); 965 if (!table_attr) 966 return -EMSGSIZE; 967 968 rt = &counter->device->res[RDMA_RESTRACK_QP]; 969 xa_lock(&rt->xa); 970 xa_for_each(&rt->xa, id, res) { 971 qp = container_of(res, struct ib_qp, res); 972 if (!qp->counter || (qp->counter->id != counter->id)) 973 continue; 974 975 ret = fill_stat_counter_qp_entry(msg, qp->qp_num); 976 if (ret) 977 goto err; 978 } 979 980 xa_unlock(&rt->xa); 981 nla_nest_end(msg, table_attr); 982 return 0; 983 984 err: 985 xa_unlock(&rt->xa); 986 nla_nest_cancel(msg, table_attr); 987 return ret; 988 } 989 990 int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, 991 u64 value) 992 { 993 struct nlattr *entry_attr; 994 995 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY); 996 if (!entry_attr) 997 return -EMSGSIZE; 998 999 if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, 1000 name)) 1001 goto err; 1002 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE, 1003 value, RDMA_NLDEV_ATTR_PAD)) 1004 goto err; 1005 1006 nla_nest_end(msg, entry_attr); 1007 return 0; 1008 1009 err: 1010 nla_nest_cancel(msg, entry_attr); 1011 return -EMSGSIZE; 1012 } 1013 EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry); 1014 1015 static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, 1016 struct rdma_restrack_entry *res, uint32_t port) 1017 { 1018 struct ib_mr *mr = container_of(res, struct ib_mr, res); 1019 struct ib_device *dev = mr->pd->device; 1020 1021 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) 1022 goto err; 1023 1024 if (dev->ops.fill_stat_mr_entry) 1025 return dev->ops.fill_stat_mr_entry(msg, mr); 1026 return 0; 1027 1028 err: 1029 return -EMSGSIZE; 1030 } 1031 1032 static int fill_stat_counter_hwcounters(struct sk_buff *msg, 1033 struct rdma_counter *counter) 1034 { 1035 struct rdma_hw_stats *st = counter->stats; 1036 struct nlattr *table_attr; 1037 int i; 1038 1039 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); 1040 if (!table_attr) 1041 return -EMSGSIZE; 1042 1043 mutex_lock(&st->lock); 1044 for (i = 0; i < st->num_counters; i++) { 1045 if (test_bit(i, st->is_disabled)) 1046 continue; 1047 if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name, 1048 st->value[i])) 1049 goto err; 1050 } 1051 mutex_unlock(&st->lock); 1052 1053 nla_nest_end(msg, table_attr); 1054 return 0; 1055 1056 err: 1057 mutex_unlock(&st->lock); 1058 nla_nest_cancel(msg, table_attr); 1059 return -EMSGSIZE; 1060 } 1061 1062 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin, 1063 struct rdma_restrack_entry *res, 1064 uint32_t port) 1065 { 1066 struct rdma_counter *counter = 1067 container_of(res, struct rdma_counter, res); 1068 1069 if (port && port != counter->port) 1070 return -EAGAIN; 1071 1072 /* Dump it even query failed */ 1073 rdma_counter_query_stats(counter); 1074 1075 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) || 1076 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) || 1077 fill_stat_counter_mode(msg, counter) || 1078 fill_stat_counter_qps(msg, counter) || 1079 fill_stat_counter_hwcounters(msg, counter)) 1080 return -EMSGSIZE; 1081 1082 return 0; 1083 } 1084 1085 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1086 struct netlink_ext_ack *extack) 1087 { 1088 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1089 struct ib_device *device; 1090 struct sk_buff *msg; 1091 u32 index; 1092 int err; 1093 1094 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1095 nldev_policy, NL_VALIDATE_LIBERAL, extack); 1096 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1097 return -EINVAL; 1098 1099 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1100 1101 device = ib_device_get_by_index(sock_net(skb->sk), index); 1102 if (!device) 1103 return -EINVAL; 1104 1105 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1106 if (!msg) { 1107 err = -ENOMEM; 1108 goto err; 1109 } 1110 1111 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1112 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 1113 0, 0); 1114 if (!nlh) { 1115 err = -EMSGSIZE; 1116 goto err_free; 1117 } 1118 1119 err = fill_dev_info(msg, device); 1120 if (err) 1121 goto err_free; 1122 1123 nlmsg_end(msg, nlh); 1124 1125 ib_device_put(device); 1126 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 1127 1128 err_free: 1129 nlmsg_free(msg); 1130 err: 1131 ib_device_put(device); 1132 return err; 1133 } 1134 1135 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1136 struct netlink_ext_ack *extack) 1137 { 1138 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1139 struct ib_device *device; 1140 u32 index; 1141 int err; 1142 1143 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1144 nldev_policy, extack); 1145 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1146 return -EINVAL; 1147 1148 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1149 device = ib_device_get_by_index(sock_net(skb->sk), index); 1150 if (!device) 1151 return -EINVAL; 1152 1153 if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) { 1154 char name[IB_DEVICE_NAME_MAX] = {}; 1155 1156 nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], 1157 IB_DEVICE_NAME_MAX); 1158 if (strlen(name) == 0) { 1159 err = -EINVAL; 1160 goto done; 1161 } 1162 err = ib_device_rename(device, name); 1163 goto done; 1164 } 1165 1166 if (tb[RDMA_NLDEV_NET_NS_FD]) { 1167 u32 ns_fd; 1168 1169 ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]); 1170 err = ib_device_set_netns_put(skb, device, ns_fd); 1171 goto put_done; 1172 } 1173 1174 if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) { 1175 u8 use_dim; 1176 1177 use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]); 1178 err = ib_device_set_dim(device, use_dim); 1179 goto done; 1180 } 1181 1182 done: 1183 ib_device_put(device); 1184 put_done: 1185 return err; 1186 } 1187 1188 static int _nldev_get_dumpit(struct ib_device *device, 1189 struct sk_buff *skb, 1190 struct netlink_callback *cb, 1191 unsigned int idx) 1192 { 1193 int start = cb->args[0]; 1194 struct nlmsghdr *nlh; 1195 1196 if (idx < start) 1197 return 0; 1198 1199 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 1200 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 1201 0, NLM_F_MULTI); 1202 1203 if (!nlh || fill_dev_info(skb, device)) { 1204 nlmsg_cancel(skb, nlh); 1205 goto out; 1206 } 1207 1208 nlmsg_end(skb, nlh); 1209 1210 idx++; 1211 1212 out: cb->args[0] = idx; 1213 return skb->len; 1214 } 1215 1216 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) 1217 { 1218 /* 1219 * There is no need to take lock, because 1220 * we are relying on ib_core's locking. 1221 */ 1222 return ib_enum_all_devs(_nldev_get_dumpit, skb, cb); 1223 } 1224 1225 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1226 struct netlink_ext_ack *extack) 1227 { 1228 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1229 struct ib_device *device; 1230 struct sk_buff *msg; 1231 u32 index; 1232 u32 port; 1233 int err; 1234 1235 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1236 nldev_policy, NL_VALIDATE_LIBERAL, extack); 1237 if (err || 1238 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || 1239 !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 1240 return -EINVAL; 1241 1242 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1243 device = ib_device_get_by_index(sock_net(skb->sk), index); 1244 if (!device) 1245 return -EINVAL; 1246 1247 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 1248 if (!rdma_is_port_valid(device, port)) { 1249 err = -EINVAL; 1250 goto err; 1251 } 1252 1253 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1254 if (!msg) { 1255 err = -ENOMEM; 1256 goto err; 1257 } 1258 1259 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1260 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 1261 0, 0); 1262 if (!nlh) { 1263 err = -EMSGSIZE; 1264 goto err_free; 1265 } 1266 1267 err = fill_port_info(msg, device, port, sock_net(skb->sk)); 1268 if (err) 1269 goto err_free; 1270 1271 nlmsg_end(msg, nlh); 1272 ib_device_put(device); 1273 1274 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 1275 1276 err_free: 1277 nlmsg_free(msg); 1278 err: 1279 ib_device_put(device); 1280 return err; 1281 } 1282 1283 static int nldev_port_get_dumpit(struct sk_buff *skb, 1284 struct netlink_callback *cb) 1285 { 1286 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1287 struct ib_device *device; 1288 int start = cb->args[0]; 1289 struct nlmsghdr *nlh; 1290 u32 idx = 0; 1291 u32 ifindex; 1292 int err; 1293 unsigned int p; 1294 1295 err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1296 nldev_policy, NL_VALIDATE_LIBERAL, NULL); 1297 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1298 return -EINVAL; 1299 1300 ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1301 device = ib_device_get_by_index(sock_net(skb->sk), ifindex); 1302 if (!device) 1303 return -EINVAL; 1304 1305 rdma_for_each_port (device, p) { 1306 /* 1307 * The dumpit function returns all information from specific 1308 * index. This specific index is taken from the netlink 1309 * messages request sent by user and it is available 1310 * in cb->args[0]. 1311 * 1312 * Usually, the user doesn't fill this field and it causes 1313 * to return everything. 1314 * 1315 */ 1316 if (idx < start) { 1317 idx++; 1318 continue; 1319 } 1320 1321 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 1322 cb->nlh->nlmsg_seq, 1323 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1324 RDMA_NLDEV_CMD_PORT_GET), 1325 0, NLM_F_MULTI); 1326 1327 if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) { 1328 nlmsg_cancel(skb, nlh); 1329 goto out; 1330 } 1331 idx++; 1332 nlmsg_end(skb, nlh); 1333 } 1334 1335 out: 1336 ib_device_put(device); 1337 cb->args[0] = idx; 1338 return skb->len; 1339 } 1340 1341 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1342 struct netlink_ext_ack *extack) 1343 { 1344 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1345 bool show_details = false; 1346 struct ib_device *device; 1347 struct sk_buff *msg; 1348 u32 index; 1349 int ret; 1350 1351 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1352 nldev_policy, NL_VALIDATE_LIBERAL, extack); 1353 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1354 return -EINVAL; 1355 1356 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1357 device = ib_device_get_by_index(sock_net(skb->sk), index); 1358 if (!device) 1359 return -EINVAL; 1360 1361 if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]) 1362 show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]); 1363 1364 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1365 if (!msg) { 1366 ret = -ENOMEM; 1367 goto err; 1368 } 1369 1370 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1371 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 1372 0, 0); 1373 if (!nlh) { 1374 ret = -EMSGSIZE; 1375 goto err_free; 1376 } 1377 1378 ret = fill_res_info(msg, device, show_details); 1379 if (ret) 1380 goto err_free; 1381 1382 nlmsg_end(msg, nlh); 1383 ib_device_put(device); 1384 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 1385 1386 err_free: 1387 nlmsg_free(msg); 1388 err: 1389 ib_device_put(device); 1390 return ret; 1391 } 1392 1393 static int _nldev_res_get_dumpit(struct ib_device *device, 1394 struct sk_buff *skb, 1395 struct netlink_callback *cb, 1396 unsigned int idx) 1397 { 1398 int start = cb->args[0]; 1399 struct nlmsghdr *nlh; 1400 1401 if (idx < start) 1402 return 0; 1403 1404 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 1405 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 1406 0, NLM_F_MULTI); 1407 1408 if (!nlh || fill_res_info(skb, device, false)) { 1409 nlmsg_cancel(skb, nlh); 1410 goto out; 1411 } 1412 nlmsg_end(skb, nlh); 1413 1414 idx++; 1415 1416 out: 1417 cb->args[0] = idx; 1418 return skb->len; 1419 } 1420 1421 static int nldev_res_get_dumpit(struct sk_buff *skb, 1422 struct netlink_callback *cb) 1423 { 1424 return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb); 1425 } 1426 1427 struct nldev_fill_res_entry { 1428 enum rdma_nldev_attr nldev_attr; 1429 u8 flags; 1430 u32 entry; 1431 u32 id; 1432 }; 1433 1434 enum nldev_res_flags { 1435 NLDEV_PER_DEV = 1 << 0, 1436 }; 1437 1438 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { 1439 [RDMA_RESTRACK_QP] = { 1440 .nldev_attr = RDMA_NLDEV_ATTR_RES_QP, 1441 .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY, 1442 .id = RDMA_NLDEV_ATTR_RES_LQPN, 1443 }, 1444 [RDMA_RESTRACK_CM_ID] = { 1445 .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID, 1446 .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, 1447 .id = RDMA_NLDEV_ATTR_RES_CM_IDN, 1448 }, 1449 [RDMA_RESTRACK_CQ] = { 1450 .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ, 1451 .flags = NLDEV_PER_DEV, 1452 .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY, 1453 .id = RDMA_NLDEV_ATTR_RES_CQN, 1454 }, 1455 [RDMA_RESTRACK_MR] = { 1456 .nldev_attr = RDMA_NLDEV_ATTR_RES_MR, 1457 .flags = NLDEV_PER_DEV, 1458 .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY, 1459 .id = RDMA_NLDEV_ATTR_RES_MRN, 1460 }, 1461 [RDMA_RESTRACK_PD] = { 1462 .nldev_attr = RDMA_NLDEV_ATTR_RES_PD, 1463 .flags = NLDEV_PER_DEV, 1464 .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY, 1465 .id = RDMA_NLDEV_ATTR_RES_PDN, 1466 }, 1467 [RDMA_RESTRACK_COUNTER] = { 1468 .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER, 1469 .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, 1470 .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID, 1471 }, 1472 [RDMA_RESTRACK_CTX] = { 1473 .nldev_attr = RDMA_NLDEV_ATTR_RES_CTX, 1474 .flags = NLDEV_PER_DEV, 1475 .entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY, 1476 .id = RDMA_NLDEV_ATTR_RES_CTXN, 1477 }, 1478 [RDMA_RESTRACK_SRQ] = { 1479 .nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ, 1480 .flags = NLDEV_PER_DEV, 1481 .entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY, 1482 .id = RDMA_NLDEV_ATTR_RES_SRQN, 1483 }, 1484 1485 }; 1486 1487 static noinline_for_stack int 1488 res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1489 struct netlink_ext_ack *extack, 1490 enum rdma_restrack_type res_type, 1491 res_fill_func_t fill_func) 1492 { 1493 const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; 1494 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1495 struct rdma_restrack_entry *res; 1496 struct ib_device *device; 1497 u32 index, id, port = 0; 1498 bool has_cap_net_admin; 1499 struct sk_buff *msg; 1500 int ret; 1501 1502 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1503 nldev_policy, NL_VALIDATE_LIBERAL, extack); 1504 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id]) 1505 return -EINVAL; 1506 1507 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1508 device = ib_device_get_by_index(sock_net(skb->sk), index); 1509 if (!device) 1510 return -EINVAL; 1511 1512 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { 1513 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 1514 if (!rdma_is_port_valid(device, port)) { 1515 ret = -EINVAL; 1516 goto err; 1517 } 1518 } 1519 1520 if ((port && fe->flags & NLDEV_PER_DEV) || 1521 (!port && ~fe->flags & NLDEV_PER_DEV)) { 1522 ret = -EINVAL; 1523 goto err; 1524 } 1525 1526 id = nla_get_u32(tb[fe->id]); 1527 res = rdma_restrack_get_byid(device, res_type, id); 1528 if (IS_ERR(res)) { 1529 ret = PTR_ERR(res); 1530 goto err; 1531 } 1532 1533 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1534 if (!msg) { 1535 ret = -ENOMEM; 1536 goto err_get; 1537 } 1538 1539 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1540 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1541 RDMA_NL_GET_OP(nlh->nlmsg_type)), 1542 0, 0); 1543 1544 if (!nlh || fill_nldev_handle(msg, device)) { 1545 ret = -EMSGSIZE; 1546 goto err_free; 1547 } 1548 1549 has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN); 1550 1551 ret = fill_func(msg, has_cap_net_admin, res, port); 1552 if (ret) 1553 goto err_free; 1554 1555 rdma_restrack_put(res); 1556 nlmsg_end(msg, nlh); 1557 ib_device_put(device); 1558 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 1559 1560 err_free: 1561 nlmsg_free(msg); 1562 err_get: 1563 rdma_restrack_put(res); 1564 err: 1565 ib_device_put(device); 1566 return ret; 1567 } 1568 1569 static int res_get_common_dumpit(struct sk_buff *skb, 1570 struct netlink_callback *cb, 1571 enum rdma_restrack_type res_type, 1572 res_fill_func_t fill_func) 1573 { 1574 const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; 1575 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1576 struct rdma_restrack_entry *res; 1577 struct rdma_restrack_root *rt; 1578 int err, ret = 0, idx = 0; 1579 bool show_details = false; 1580 struct nlattr *table_attr; 1581 struct nlattr *entry_attr; 1582 struct ib_device *device; 1583 int start = cb->args[0]; 1584 bool has_cap_net_admin; 1585 struct nlmsghdr *nlh; 1586 unsigned long id; 1587 u32 index, port = 0; 1588 bool filled = false; 1589 1590 err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1591 nldev_policy, NL_VALIDATE_LIBERAL, NULL); 1592 /* 1593 * Right now, we are expecting the device index to get res information, 1594 * but it is possible to extend this code to return all devices in 1595 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX. 1596 * if it doesn't exist, we will iterate over all devices. 1597 * 1598 * But it is not needed for now. 1599 */ 1600 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1601 return -EINVAL; 1602 1603 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1604 device = ib_device_get_by_index(sock_net(skb->sk), index); 1605 if (!device) 1606 return -EINVAL; 1607 1608 if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]) 1609 show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]); 1610 1611 /* 1612 * If no PORT_INDEX is supplied, we will return all QPs from that device 1613 */ 1614 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { 1615 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 1616 if (!rdma_is_port_valid(device, port)) { 1617 ret = -EINVAL; 1618 goto err_index; 1619 } 1620 } 1621 1622 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 1623 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1624 RDMA_NL_GET_OP(cb->nlh->nlmsg_type)), 1625 0, NLM_F_MULTI); 1626 1627 if (!nlh || fill_nldev_handle(skb, device)) { 1628 ret = -EMSGSIZE; 1629 goto err; 1630 } 1631 1632 table_attr = nla_nest_start_noflag(skb, fe->nldev_attr); 1633 if (!table_attr) { 1634 ret = -EMSGSIZE; 1635 goto err; 1636 } 1637 1638 has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN); 1639 1640 rt = &device->res[res_type]; 1641 xa_lock(&rt->xa); 1642 /* 1643 * FIXME: if the skip ahead is something common this loop should 1644 * use xas_for_each & xas_pause to optimize, we can have a lot of 1645 * objects. 1646 */ 1647 xa_for_each(&rt->xa, id, res) { 1648 if (xa_get_mark(&rt->xa, res->id, RESTRACK_DD) && !show_details) 1649 goto next; 1650 1651 if (idx < start || !rdma_restrack_get(res)) 1652 goto next; 1653 1654 xa_unlock(&rt->xa); 1655 1656 filled = true; 1657 1658 entry_attr = nla_nest_start_noflag(skb, fe->entry); 1659 if (!entry_attr) { 1660 ret = -EMSGSIZE; 1661 rdma_restrack_put(res); 1662 goto msg_full; 1663 } 1664 1665 ret = fill_func(skb, has_cap_net_admin, res, port); 1666 1667 rdma_restrack_put(res); 1668 1669 if (ret) { 1670 nla_nest_cancel(skb, entry_attr); 1671 if (ret == -EMSGSIZE) 1672 goto msg_full; 1673 if (ret == -EAGAIN) 1674 goto again; 1675 goto res_err; 1676 } 1677 nla_nest_end(skb, entry_attr); 1678 again: xa_lock(&rt->xa); 1679 next: idx++; 1680 } 1681 xa_unlock(&rt->xa); 1682 1683 msg_full: 1684 nla_nest_end(skb, table_attr); 1685 nlmsg_end(skb, nlh); 1686 cb->args[0] = idx; 1687 1688 /* 1689 * No more entries to fill, cancel the message and 1690 * return 0 to mark end of dumpit. 1691 */ 1692 if (!filled) 1693 goto err; 1694 1695 ib_device_put(device); 1696 return skb->len; 1697 1698 res_err: 1699 nla_nest_cancel(skb, table_attr); 1700 1701 err: 1702 nlmsg_cancel(skb, nlh); 1703 1704 err_index: 1705 ib_device_put(device); 1706 return ret; 1707 } 1708 1709 #define RES_GET_FUNCS(name, type) \ 1710 static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ 1711 struct netlink_callback *cb) \ 1712 { \ 1713 return res_get_common_dumpit(skb, cb, type, \ 1714 fill_res_##name##_entry); \ 1715 } \ 1716 static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ 1717 struct nlmsghdr *nlh, \ 1718 struct netlink_ext_ack *extack) \ 1719 { \ 1720 return res_get_common_doit(skb, nlh, extack, type, \ 1721 fill_res_##name##_entry); \ 1722 } 1723 1724 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP); 1725 RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP); 1726 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID); 1727 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ); 1728 RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ); 1729 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD); 1730 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR); 1731 RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR); 1732 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER); 1733 RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX); 1734 RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ); 1735 RES_GET_FUNCS(srq_raw, RDMA_RESTRACK_SRQ); 1736 1737 static LIST_HEAD(link_ops); 1738 static DECLARE_RWSEM(link_ops_rwsem); 1739 1740 static const struct rdma_link_ops *link_ops_get(const char *type) 1741 { 1742 const struct rdma_link_ops *ops; 1743 1744 list_for_each_entry(ops, &link_ops, list) { 1745 if (!strcmp(ops->type, type)) 1746 goto out; 1747 } 1748 ops = NULL; 1749 out: 1750 return ops; 1751 } 1752 1753 void rdma_link_register(struct rdma_link_ops *ops) 1754 { 1755 down_write(&link_ops_rwsem); 1756 if (WARN_ON_ONCE(link_ops_get(ops->type))) 1757 goto out; 1758 list_add(&ops->list, &link_ops); 1759 out: 1760 up_write(&link_ops_rwsem); 1761 } 1762 EXPORT_SYMBOL(rdma_link_register); 1763 1764 void rdma_link_unregister(struct rdma_link_ops *ops) 1765 { 1766 down_write(&link_ops_rwsem); 1767 list_del(&ops->list); 1768 up_write(&link_ops_rwsem); 1769 } 1770 EXPORT_SYMBOL(rdma_link_unregister); 1771 1772 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, 1773 struct netlink_ext_ack *extack) 1774 { 1775 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1776 char ibdev_name[IB_DEVICE_NAME_MAX]; 1777 const struct rdma_link_ops *ops; 1778 char ndev_name[IFNAMSIZ]; 1779 struct net_device *ndev; 1780 char type[IFNAMSIZ]; 1781 int err; 1782 1783 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1784 nldev_policy, extack); 1785 if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || 1786 !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME]) 1787 return -EINVAL; 1788 1789 nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], 1790 sizeof(ibdev_name)); 1791 if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0) 1792 return -EINVAL; 1793 1794 nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); 1795 nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME], 1796 sizeof(ndev_name)); 1797 1798 ndev = dev_get_by_name(sock_net(skb->sk), ndev_name); 1799 if (!ndev) 1800 return -ENODEV; 1801 1802 down_read(&link_ops_rwsem); 1803 ops = link_ops_get(type); 1804 #ifdef CONFIG_MODULES 1805 if (!ops) { 1806 up_read(&link_ops_rwsem); 1807 request_module("rdma-link-%s", type); 1808 down_read(&link_ops_rwsem); 1809 ops = link_ops_get(type); 1810 } 1811 #endif 1812 err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL; 1813 up_read(&link_ops_rwsem); 1814 dev_put(ndev); 1815 1816 return err; 1817 } 1818 1819 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, 1820 struct netlink_ext_ack *extack) 1821 { 1822 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1823 struct ib_device *device; 1824 u32 index; 1825 int err; 1826 1827 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1828 nldev_policy, extack); 1829 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1830 return -EINVAL; 1831 1832 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1833 device = ib_device_get_by_index(sock_net(skb->sk), index); 1834 if (!device) 1835 return -EINVAL; 1836 1837 if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) { 1838 ib_device_put(device); 1839 return -EINVAL; 1840 } 1841 1842 /* 1843 * This path is triggered by the 'rdma link delete' administrative command. 1844 * For Soft-RoCE (RXE), we ensure that transport sockets are closed here. 1845 * Note: iWARP driver does not implement .dellink, so this logic is 1846 * implicitly scoped to the driver supporting dynamic link deletion like RXE. 1847 */ 1848 if (device->link_ops && device->link_ops->dellink) { 1849 err = device->link_ops->dellink(device); 1850 if (err) 1851 return err; 1852 } 1853 1854 ib_unregister_device_and_put(device); 1855 return 0; 1856 } 1857 1858 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh, 1859 struct netlink_ext_ack *extack) 1860 { 1861 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1862 char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE]; 1863 struct ib_client_nl_info data = {}; 1864 struct ib_device *ibdev = NULL; 1865 struct sk_buff *msg; 1866 u32 index; 1867 int err; 1868 1869 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, 1870 NL_VALIDATE_LIBERAL, extack); 1871 if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE]) 1872 return -EINVAL; 1873 1874 nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE], 1875 sizeof(client_name)); 1876 1877 if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) { 1878 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1879 ibdev = ib_device_get_by_index(sock_net(skb->sk), index); 1880 if (!ibdev) 1881 return -EINVAL; 1882 1883 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { 1884 data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 1885 if (!rdma_is_port_valid(ibdev, data.port)) { 1886 err = -EINVAL; 1887 goto out_put; 1888 } 1889 } else { 1890 data.port = -1; 1891 } 1892 } else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { 1893 return -EINVAL; 1894 } 1895 1896 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1897 if (!msg) { 1898 err = -ENOMEM; 1899 goto out_put; 1900 } 1901 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1902 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1903 RDMA_NLDEV_CMD_GET_CHARDEV), 1904 0, 0); 1905 if (!nlh) { 1906 err = -EMSGSIZE; 1907 goto out_nlmsg; 1908 } 1909 1910 data.nl_msg = msg; 1911 err = ib_get_client_nl_info(ibdev, client_name, &data); 1912 if (err) 1913 goto out_nlmsg; 1914 1915 err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV, 1916 huge_encode_dev(data.cdev->devt), 1917 RDMA_NLDEV_ATTR_PAD); 1918 if (err) 1919 goto out_data; 1920 err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi, 1921 RDMA_NLDEV_ATTR_PAD); 1922 if (err) 1923 goto out_data; 1924 if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME, 1925 dev_name(data.cdev))) { 1926 err = -EMSGSIZE; 1927 goto out_data; 1928 } 1929 1930 nlmsg_end(msg, nlh); 1931 put_device(data.cdev); 1932 if (ibdev) 1933 ib_device_put(ibdev); 1934 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 1935 1936 out_data: 1937 put_device(data.cdev); 1938 out_nlmsg: 1939 nlmsg_free(msg); 1940 out_put: 1941 if (ibdev) 1942 ib_device_put(ibdev); 1943 return err; 1944 } 1945 1946 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1947 struct netlink_ext_ack *extack) 1948 { 1949 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1950 struct sk_buff *msg; 1951 int err; 1952 1953 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1954 nldev_policy, NL_VALIDATE_LIBERAL, extack); 1955 if (err) 1956 return err; 1957 1958 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1959 if (!msg) 1960 return -ENOMEM; 1961 1962 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1963 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1964 RDMA_NLDEV_CMD_SYS_GET), 1965 0, 0); 1966 if (!nlh) { 1967 nlmsg_free(msg); 1968 return -EMSGSIZE; 1969 } 1970 1971 err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE, 1972 (u8)ib_devices_shared_netns); 1973 if (err) { 1974 nlmsg_free(msg); 1975 return err; 1976 } 1977 1978 err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE, 1979 (u8)privileged_qkey); 1980 if (err) { 1981 nlmsg_free(msg); 1982 return err; 1983 } 1984 1985 err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, 1); 1986 if (err) { 1987 nlmsg_free(msg); 1988 return err; 1989 } 1990 /* 1991 * Copy-on-fork is supported. 1992 * See commits: 1993 * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes") 1994 * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm") 1995 * for more details. Don't backport this without them. 1996 * 1997 * Return value ignored on purpose, assume copy-on-fork is not 1998 * supported in case of failure. 1999 */ 2000 nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1); 2001 2002 nlmsg_end(msg, nlh); 2003 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2004 } 2005 2006 static int nldev_set_sys_set_netns_doit(struct nlattr *tb[]) 2007 { 2008 u8 enable; 2009 int err; 2010 2011 enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]); 2012 /* Only 0 and 1 are supported */ 2013 if (enable > 1) 2014 return -EINVAL; 2015 2016 err = rdma_compatdev_set(enable); 2017 return err; 2018 } 2019 2020 static int nldev_set_sys_set_pqkey_doit(struct nlattr *tb[]) 2021 { 2022 u8 enable; 2023 2024 enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]); 2025 /* Only 0 and 1 are supported */ 2026 if (enable > 1) 2027 return -EINVAL; 2028 2029 privileged_qkey = enable; 2030 return 0; 2031 } 2032 2033 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 2034 struct netlink_ext_ack *extack) 2035 { 2036 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2037 int err; 2038 2039 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2040 nldev_policy, extack); 2041 if (err) 2042 return -EINVAL; 2043 2044 if (tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]) 2045 return nldev_set_sys_set_netns_doit(tb); 2046 2047 if (tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]) 2048 return nldev_set_sys_set_pqkey_doit(tb); 2049 2050 return -EINVAL; 2051 } 2052 2053 2054 static int nldev_stat_set_mode_doit(struct sk_buff *msg, 2055 struct netlink_ext_ack *extack, 2056 struct nlattr *tb[], 2057 struct ib_device *device, u32 port) 2058 { 2059 u32 mode, mask = 0, qpn, cntn = 0; 2060 bool opcnt = false; 2061 int ret; 2062 2063 /* Currently only counter for QP is supported */ 2064 if (!tb[RDMA_NLDEV_ATTR_STAT_RES] || 2065 nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) 2066 return -EINVAL; 2067 2068 if (tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]) 2069 opcnt = !!nla_get_u8( 2070 tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]); 2071 2072 mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]); 2073 if (mode == RDMA_COUNTER_MODE_AUTO) { 2074 if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) 2075 mask = nla_get_u32( 2076 tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); 2077 return rdma_counter_set_auto_mode(device, port, mask, opcnt, 2078 extack); 2079 } 2080 2081 if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) 2082 return -EINVAL; 2083 2084 qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); 2085 if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { 2086 cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); 2087 ret = rdma_counter_bind_qpn(device, port, qpn, cntn); 2088 if (ret) 2089 return ret; 2090 } else { 2091 ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn); 2092 if (ret) 2093 return ret; 2094 } 2095 2096 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || 2097 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { 2098 ret = -EMSGSIZE; 2099 goto err_fill; 2100 } 2101 2102 return 0; 2103 2104 err_fill: 2105 rdma_counter_unbind_qpn(device, port, qpn, cntn); 2106 return ret; 2107 } 2108 2109 static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[], 2110 struct ib_device *device, 2111 u32 port) 2112 { 2113 struct rdma_hw_stats *stats; 2114 struct nlattr *entry_attr; 2115 unsigned long *target; 2116 int rem, i, ret = 0; 2117 u32 index; 2118 2119 stats = ib_get_hw_stats_port(device, port); 2120 if (!stats) 2121 return -EINVAL; 2122 2123 target = kcalloc(BITS_TO_LONGS(stats->num_counters), 2124 sizeof(*stats->is_disabled), GFP_KERNEL); 2125 if (!target) 2126 return -ENOMEM; 2127 2128 nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS], 2129 rem) { 2130 index = nla_get_u32(entry_attr); 2131 if ((index >= stats->num_counters) || 2132 !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) { 2133 ret = -EINVAL; 2134 goto out; 2135 } 2136 2137 set_bit(index, target); 2138 } 2139 2140 for (i = 0; i < stats->num_counters; i++) { 2141 if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL)) 2142 continue; 2143 2144 ret = rdma_counter_modify(device, port, i, test_bit(i, target)); 2145 if (ret) 2146 goto out; 2147 } 2148 2149 out: 2150 kfree(target); 2151 return ret; 2152 } 2153 2154 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 2155 struct netlink_ext_ack *extack) 2156 { 2157 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2158 struct ib_device *device; 2159 struct sk_buff *msg; 2160 u32 index, port; 2161 int ret; 2162 2163 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, 2164 extack); 2165 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || 2166 !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 2167 return -EINVAL; 2168 2169 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2170 device = ib_device_get_by_index(sock_net(skb->sk), index); 2171 if (!device) 2172 return -EINVAL; 2173 2174 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2175 if (!rdma_is_port_valid(device, port)) { 2176 ret = -EINVAL; 2177 goto err_put_device; 2178 } 2179 2180 if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] && 2181 !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { 2182 ret = -EINVAL; 2183 goto err_put_device; 2184 } 2185 2186 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2187 if (!msg) { 2188 ret = -ENOMEM; 2189 goto err_put_device; 2190 } 2191 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2192 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 2193 RDMA_NLDEV_CMD_STAT_SET), 2194 0, 0); 2195 if (!nlh || fill_nldev_handle(msg, device) || 2196 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { 2197 ret = -EMSGSIZE; 2198 goto err_free_msg; 2199 } 2200 2201 if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) { 2202 ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port); 2203 if (ret) 2204 goto err_free_msg; 2205 } 2206 2207 if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { 2208 ret = nldev_stat_set_counter_dynamic_doit(tb, device, port); 2209 if (ret) 2210 goto err_free_msg; 2211 } 2212 2213 nlmsg_end(msg, nlh); 2214 ib_device_put(device); 2215 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2216 2217 err_free_msg: 2218 nlmsg_free(msg); 2219 err_put_device: 2220 ib_device_put(device); 2221 return ret; 2222 } 2223 2224 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 2225 struct netlink_ext_ack *extack) 2226 { 2227 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2228 struct ib_device *device; 2229 struct sk_buff *msg; 2230 u32 index, port, qpn, cntn; 2231 int ret; 2232 2233 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2234 nldev_policy, extack); 2235 if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] || 2236 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || 2237 !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] || 2238 !tb[RDMA_NLDEV_ATTR_RES_LQPN]) 2239 return -EINVAL; 2240 2241 if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) 2242 return -EINVAL; 2243 2244 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2245 device = ib_device_get_by_index(sock_net(skb->sk), index); 2246 if (!device) 2247 return -EINVAL; 2248 2249 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2250 if (!rdma_is_port_valid(device, port)) { 2251 ret = -EINVAL; 2252 goto err; 2253 } 2254 2255 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2256 if (!msg) { 2257 ret = -ENOMEM; 2258 goto err; 2259 } 2260 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2261 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 2262 RDMA_NLDEV_CMD_STAT_SET), 2263 0, 0); 2264 if (!nlh) { 2265 ret = -EMSGSIZE; 2266 goto err_fill; 2267 } 2268 2269 cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); 2270 qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); 2271 if (fill_nldev_handle(msg, device) || 2272 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || 2273 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || 2274 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { 2275 ret = -EMSGSIZE; 2276 goto err_fill; 2277 } 2278 2279 ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); 2280 if (ret) 2281 goto err_fill; 2282 2283 nlmsg_end(msg, nlh); 2284 ib_device_put(device); 2285 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2286 2287 err_fill: 2288 nlmsg_free(msg); 2289 err: 2290 ib_device_put(device); 2291 return ret; 2292 } 2293 2294 static noinline_for_stack int 2295 stat_get_doit_default_counter(struct sk_buff *skb, struct nlmsghdr *nlh, 2296 struct netlink_ext_ack *extack, 2297 struct nlattr *tb[]) 2298 { 2299 struct rdma_hw_stats *stats; 2300 struct nlattr *table_attr; 2301 struct ib_device *device; 2302 int ret, num_cnts, i; 2303 struct sk_buff *msg; 2304 u32 index, port; 2305 u64 v; 2306 2307 if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 2308 return -EINVAL; 2309 2310 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2311 device = ib_device_get_by_index(sock_net(skb->sk), index); 2312 if (!device) 2313 return -EINVAL; 2314 2315 if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) { 2316 ret = -EINVAL; 2317 goto err; 2318 } 2319 2320 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2321 stats = ib_get_hw_stats_port(device, port); 2322 if (!stats) { 2323 ret = -EINVAL; 2324 goto err; 2325 } 2326 2327 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2328 if (!msg) { 2329 ret = -ENOMEM; 2330 goto err; 2331 } 2332 2333 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2334 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 2335 RDMA_NLDEV_CMD_STAT_GET), 2336 0, 0); 2337 2338 if (!nlh || fill_nldev_handle(msg, device) || 2339 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { 2340 ret = -EMSGSIZE; 2341 goto err_msg; 2342 } 2343 2344 mutex_lock(&stats->lock); 2345 2346 num_cnts = device->ops.get_hw_stats(device, stats, port, 0); 2347 if (num_cnts < 0) { 2348 ret = -EINVAL; 2349 goto err_stats; 2350 } 2351 2352 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); 2353 if (!table_attr) { 2354 ret = -EMSGSIZE; 2355 goto err_stats; 2356 } 2357 for (i = 0; i < num_cnts; i++) { 2358 if (test_bit(i, stats->is_disabled)) 2359 continue; 2360 2361 v = stats->value[i] + 2362 rdma_counter_get_hwstat_value(device, port, i); 2363 if (rdma_nl_stat_hwcounter_entry(msg, 2364 stats->descs[i].name, v)) { 2365 ret = -EMSGSIZE; 2366 goto err_table; 2367 } 2368 } 2369 nla_nest_end(msg, table_attr); 2370 2371 mutex_unlock(&stats->lock); 2372 nlmsg_end(msg, nlh); 2373 ib_device_put(device); 2374 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2375 2376 err_table: 2377 nla_nest_cancel(msg, table_attr); 2378 err_stats: 2379 mutex_unlock(&stats->lock); 2380 err_msg: 2381 nlmsg_free(msg); 2382 err: 2383 ib_device_put(device); 2384 return ret; 2385 } 2386 2387 static noinline_for_stack int 2388 stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh, 2389 struct netlink_ext_ack *extack, struct nlattr *tb[]) 2390 2391 { 2392 static enum rdma_nl_counter_mode mode; 2393 static enum rdma_nl_counter_mask mask; 2394 struct ib_device *device; 2395 struct sk_buff *msg; 2396 u32 index, port; 2397 bool opcnt; 2398 int ret; 2399 2400 if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) 2401 return nldev_res_get_counter_doit(skb, nlh, extack); 2402 2403 if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] || 2404 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 2405 return -EINVAL; 2406 2407 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2408 device = ib_device_get_by_index(sock_net(skb->sk), index); 2409 if (!device) 2410 return -EINVAL; 2411 2412 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2413 if (!rdma_is_port_valid(device, port)) { 2414 ret = -EINVAL; 2415 goto err; 2416 } 2417 2418 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2419 if (!msg) { 2420 ret = -ENOMEM; 2421 goto err; 2422 } 2423 2424 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2425 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 2426 RDMA_NLDEV_CMD_STAT_GET), 2427 0, 0); 2428 if (!nlh) { 2429 ret = -EMSGSIZE; 2430 goto err_msg; 2431 } 2432 2433 ret = rdma_counter_get_mode(device, port, &mode, &mask, &opcnt); 2434 if (ret) 2435 goto err_msg; 2436 2437 if (fill_nldev_handle(msg, device) || 2438 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || 2439 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) { 2440 ret = -EMSGSIZE; 2441 goto err_msg; 2442 } 2443 2444 if ((mode == RDMA_COUNTER_MODE_AUTO) && 2445 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) { 2446 ret = -EMSGSIZE; 2447 goto err_msg; 2448 } 2449 2450 if ((mode == RDMA_COUNTER_MODE_AUTO) && 2451 nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, opcnt)) { 2452 ret = -EMSGSIZE; 2453 goto err_msg; 2454 } 2455 2456 nlmsg_end(msg, nlh); 2457 ib_device_put(device); 2458 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2459 2460 err_msg: 2461 nlmsg_free(msg); 2462 err: 2463 ib_device_put(device); 2464 return ret; 2465 } 2466 2467 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 2468 struct netlink_ext_ack *extack) 2469 { 2470 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2471 int ret; 2472 2473 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2474 nldev_policy, NL_VALIDATE_LIBERAL, extack); 2475 if (ret) 2476 return -EINVAL; 2477 2478 if (!tb[RDMA_NLDEV_ATTR_STAT_RES]) 2479 return stat_get_doit_default_counter(skb, nlh, extack, tb); 2480 2481 switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) { 2482 case RDMA_NLDEV_ATTR_RES_QP: 2483 ret = stat_get_doit_qp(skb, nlh, extack, tb); 2484 break; 2485 case RDMA_NLDEV_ATTR_RES_MR: 2486 ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR, 2487 fill_stat_mr_entry); 2488 break; 2489 default: 2490 ret = -EINVAL; 2491 break; 2492 } 2493 2494 return ret; 2495 } 2496 2497 static int nldev_stat_get_dumpit(struct sk_buff *skb, 2498 struct netlink_callback *cb) 2499 { 2500 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2501 int ret; 2502 2503 ret = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2504 nldev_policy, NL_VALIDATE_LIBERAL, NULL); 2505 if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES]) 2506 return -EINVAL; 2507 2508 switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) { 2509 case RDMA_NLDEV_ATTR_RES_QP: 2510 ret = nldev_res_get_counter_dumpit(skb, cb); 2511 break; 2512 case RDMA_NLDEV_ATTR_RES_MR: 2513 ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR, 2514 fill_stat_mr_entry); 2515 break; 2516 default: 2517 ret = -EINVAL; 2518 break; 2519 } 2520 2521 return ret; 2522 } 2523 2524 static int nldev_stat_get_counter_status_doit(struct sk_buff *skb, 2525 struct nlmsghdr *nlh, 2526 struct netlink_ext_ack *extack) 2527 { 2528 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry; 2529 struct rdma_hw_stats *stats; 2530 struct ib_device *device; 2531 struct sk_buff *msg; 2532 u32 devid, port; 2533 int ret, i; 2534 2535 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2536 nldev_policy, NL_VALIDATE_LIBERAL, extack); 2537 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || 2538 !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 2539 return -EINVAL; 2540 2541 devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2542 device = ib_device_get_by_index(sock_net(skb->sk), devid); 2543 if (!device) 2544 return -EINVAL; 2545 2546 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2547 if (!rdma_is_port_valid(device, port)) { 2548 ret = -EINVAL; 2549 goto err; 2550 } 2551 2552 stats = ib_get_hw_stats_port(device, port); 2553 if (!stats) { 2554 ret = -EINVAL; 2555 goto err; 2556 } 2557 2558 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2559 if (!msg) { 2560 ret = -ENOMEM; 2561 goto err; 2562 } 2563 2564 nlh = nlmsg_put( 2565 msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2566 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS), 2567 0, 0); 2568 2569 ret = -EMSGSIZE; 2570 if (!nlh || fill_nldev_handle(msg, device) || 2571 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) 2572 goto err_msg; 2573 2574 table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); 2575 if (!table) 2576 goto err_msg; 2577 2578 mutex_lock(&stats->lock); 2579 for (i = 0; i < stats->num_counters; i++) { 2580 entry = nla_nest_start(msg, 2581 RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY); 2582 if (!entry) 2583 goto err_msg_table; 2584 2585 if (nla_put_string(msg, 2586 RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, 2587 stats->descs[i].name) || 2588 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i)) 2589 goto err_msg_entry; 2590 2591 if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) && 2592 (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC, 2593 !test_bit(i, stats->is_disabled)))) 2594 goto err_msg_entry; 2595 2596 nla_nest_end(msg, entry); 2597 } 2598 mutex_unlock(&stats->lock); 2599 2600 nla_nest_end(msg, table); 2601 nlmsg_end(msg, nlh); 2602 ib_device_put(device); 2603 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2604 2605 err_msg_entry: 2606 nla_nest_cancel(msg, entry); 2607 err_msg_table: 2608 mutex_unlock(&stats->lock); 2609 nla_nest_cancel(msg, table); 2610 err_msg: 2611 nlmsg_free(msg); 2612 err: 2613 ib_device_put(device); 2614 return ret; 2615 } 2616 2617 static int nldev_newdev(struct sk_buff *skb, struct nlmsghdr *nlh, 2618 struct netlink_ext_ack *extack) 2619 { 2620 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2621 enum rdma_nl_dev_type type; 2622 struct ib_device *parent; 2623 char name[IFNAMSIZ] = {}; 2624 u32 parentid; 2625 int ret; 2626 2627 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2628 nldev_policy, extack); 2629 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || 2630 !tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_TYPE]) 2631 return -EINVAL; 2632 2633 nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(name)); 2634 type = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_TYPE]); 2635 parentid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2636 parent = ib_device_get_by_index(sock_net(skb->sk), parentid); 2637 if (!parent) 2638 return -EINVAL; 2639 2640 ret = ib_add_sub_device(parent, type, name); 2641 ib_device_put(parent); 2642 2643 return ret; 2644 } 2645 2646 static int nldev_deldev(struct sk_buff *skb, struct nlmsghdr *nlh, 2647 struct netlink_ext_ack *extack) 2648 { 2649 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2650 struct ib_device *device; 2651 u32 devid; 2652 int ret; 2653 2654 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2655 nldev_policy, extack); 2656 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 2657 return -EINVAL; 2658 2659 devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2660 device = ib_device_get_by_index(sock_net(skb->sk), devid); 2661 if (!device) 2662 return -EINVAL; 2663 2664 return ib_del_sub_device_and_put(device); 2665 } 2666 2667 static int fill_frmr_pool_key(struct sk_buff *msg, struct ib_frmr_key *key) 2668 { 2669 struct nlattr *key_attr; 2670 2671 key_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY); 2672 if (!key_attr) 2673 return -EMSGSIZE; 2674 2675 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS, key->ats)) 2676 goto err; 2677 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS, 2678 key->access_flags)) 2679 goto err; 2680 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY, 2681 key->vendor_key, RDMA_NLDEV_ATTR_PAD)) 2682 goto err; 2683 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS, 2684 key->num_dma_blocks, RDMA_NLDEV_ATTR_PAD)) 2685 goto err; 2686 2687 if (key->kernel_vendor_key && 2688 nla_put_u64_64bit(msg, 2689 RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY, 2690 key->kernel_vendor_key, RDMA_NLDEV_ATTR_PAD)) 2691 goto err; 2692 2693 nla_nest_end(msg, key_attr); 2694 return 0; 2695 2696 err: 2697 return -EMSGSIZE; 2698 } 2699 2700 static int fill_frmr_pool_entry(struct sk_buff *msg, struct ib_frmr_pool *pool) 2701 { 2702 if (fill_frmr_pool_key(msg, &pool->key)) 2703 return -EMSGSIZE; 2704 2705 spin_lock(&pool->lock); 2706 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_QUEUE_HANDLES, 2707 pool->queue.ci + pool->inactive_queue.ci)) 2708 goto err_unlock; 2709 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_MAX_IN_USE, 2710 pool->max_in_use, RDMA_NLDEV_ATTR_PAD)) 2711 goto err_unlock; 2712 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_IN_USE, 2713 pool->in_use, RDMA_NLDEV_ATTR_PAD)) 2714 goto err_unlock; 2715 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES, 2716 pool->pinned_handles)) 2717 goto err_unlock; 2718 spin_unlock(&pool->lock); 2719 2720 return 0; 2721 2722 err_unlock: 2723 spin_unlock(&pool->lock); 2724 return -EMSGSIZE; 2725 } 2726 2727 static int nldev_frmr_pools_parse_key(struct nlattr *tb[], 2728 struct ib_frmr_key *key, 2729 struct netlink_ext_ack *extack) 2730 { 2731 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS]) 2732 key->ats = nla_get_u8(tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS]); 2733 2734 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS]) 2735 key->access_flags = nla_get_u32( 2736 tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS]); 2737 2738 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY]) 2739 key->vendor_key = nla_get_u64( 2740 tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY]); 2741 2742 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS]) 2743 key->num_dma_blocks = nla_get_u64( 2744 tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS]); 2745 2746 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY]) 2747 return -EINVAL; 2748 2749 return 0; 2750 } 2751 2752 static int nldev_frmr_pools_set_pinned(struct ib_device *device, 2753 struct nlattr *tb[], 2754 struct netlink_ext_ack *extack) 2755 { 2756 struct nlattr *key_tb[RDMA_NLDEV_ATTR_MAX]; 2757 struct ib_frmr_key key = { 0 }; 2758 u32 pinned_handles = 0; 2759 int err = 0; 2760 2761 pinned_handles = 2762 nla_get_u32(tb[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES]); 2763 2764 if (!tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY]) 2765 return -EINVAL; 2766 2767 err = nla_parse_nested(key_tb, RDMA_NLDEV_ATTR_MAX - 1, 2768 tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY], nldev_policy, 2769 extack); 2770 if (err) 2771 return err; 2772 2773 err = nldev_frmr_pools_parse_key(key_tb, &key, extack); 2774 if (err) 2775 return err; 2776 2777 err = ib_frmr_pools_set_pinned(device, &key, pinned_handles); 2778 2779 return err; 2780 } 2781 2782 static int nldev_frmr_pools_get_dumpit(struct sk_buff *skb, 2783 struct netlink_callback *cb) 2784 { 2785 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2786 struct ib_frmr_pools *pools; 2787 int err, ret = 0, idx = 0; 2788 struct ib_frmr_pool *pool; 2789 struct nlattr *table_attr; 2790 struct nlattr *entry_attr; 2791 bool show_details = false; 2792 struct ib_device *device; 2793 int start = cb->args[0]; 2794 struct rb_node *node; 2795 struct nlmsghdr *nlh; 2796 bool filled = false; 2797 2798 err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2799 nldev_policy, NL_VALIDATE_LIBERAL, NULL); 2800 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 2801 return -EINVAL; 2802 2803 device = ib_device_get_by_index( 2804 sock_net(skb->sk), nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX])); 2805 if (!device) 2806 return -EINVAL; 2807 2808 if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]) 2809 show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]); 2810 2811 pools = device->frmr_pools; 2812 if (!pools) { 2813 ib_device_put(device); 2814 return 0; 2815 } 2816 2817 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 2818 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 2819 RDMA_NLDEV_CMD_FRMR_POOLS_GET), 2820 0, NLM_F_MULTI); 2821 2822 if (!nlh || fill_nldev_handle(skb, device)) { 2823 ret = -EMSGSIZE; 2824 goto err; 2825 } 2826 2827 table_attr = nla_nest_start_noflag(skb, RDMA_NLDEV_ATTR_FRMR_POOLS); 2828 if (!table_attr) { 2829 ret = -EMSGSIZE; 2830 goto err; 2831 } 2832 2833 read_lock(&pools->rb_lock); 2834 for (node = rb_first(&pools->rb_root); node; node = rb_next(node)) { 2835 pool = rb_entry(node, struct ib_frmr_pool, node); 2836 if (pool->key.kernel_vendor_key && !show_details) 2837 continue; 2838 2839 if (idx < start) { 2840 idx++; 2841 continue; 2842 } 2843 2844 filled = true; 2845 2846 entry_attr = nla_nest_start_noflag( 2847 skb, RDMA_NLDEV_ATTR_FRMR_POOL_ENTRY); 2848 if (!entry_attr) { 2849 ret = -EMSGSIZE; 2850 goto end_msg; 2851 } 2852 2853 if (fill_frmr_pool_entry(skb, pool)) { 2854 nla_nest_cancel(skb, entry_attr); 2855 ret = -EMSGSIZE; 2856 goto end_msg; 2857 } 2858 2859 nla_nest_end(skb, entry_attr); 2860 idx++; 2861 } 2862 end_msg: 2863 read_unlock(&pools->rb_lock); 2864 2865 nla_nest_end(skb, table_attr); 2866 nlmsg_end(skb, nlh); 2867 cb->args[0] = idx; 2868 2869 /* 2870 * No more entries to fill, cancel the message and 2871 * return 0 to mark end of dumpit. 2872 */ 2873 if (!filled) 2874 goto err; 2875 2876 ib_device_put(device); 2877 return skb->len; 2878 2879 err: 2880 nlmsg_cancel(skb, nlh); 2881 ib_device_put(device); 2882 return ret; 2883 } 2884 2885 static int nldev_frmr_pools_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 2886 struct netlink_ext_ack *extack) 2887 { 2888 struct ib_device *device; 2889 struct nlattr **tb; 2890 u32 aging_period; 2891 int err; 2892 2893 tb = kzalloc_objs(*tb, RDMA_NLDEV_ATTR_MAX, GFP_KERNEL); 2894 if (!tb) 2895 return -ENOMEM; 2896 2897 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, 2898 extack); 2899 if (err) 2900 goto free_tb; 2901 2902 if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX]) { 2903 err = -EINVAL; 2904 goto free_tb; 2905 } 2906 2907 device = ib_device_get_by_index( 2908 sock_net(skb->sk), nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX])); 2909 if (!device) { 2910 err = -EINVAL; 2911 goto free_tb; 2912 } 2913 2914 if (tb[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD]) { 2915 aging_period = nla_get_u32( 2916 tb[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD]); 2917 err = ib_frmr_pools_set_aging_period(device, aging_period); 2918 goto done; 2919 } 2920 2921 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES]) 2922 err = nldev_frmr_pools_set_pinned(device, tb, extack); 2923 2924 done: 2925 ib_device_put(device); 2926 free_tb: 2927 kfree(tb); 2928 return err; 2929 } 2930 2931 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { 2932 [RDMA_NLDEV_CMD_GET] = { 2933 .doit = nldev_get_doit, 2934 .dump = nldev_get_dumpit, 2935 }, 2936 [RDMA_NLDEV_CMD_GET_CHARDEV] = { 2937 .doit = nldev_get_chardev, 2938 }, 2939 [RDMA_NLDEV_CMD_SET] = { 2940 .doit = nldev_set_doit, 2941 .flags = RDMA_NL_ADMIN_PERM, 2942 }, 2943 [RDMA_NLDEV_CMD_NEWLINK] = { 2944 .doit = nldev_newlink, 2945 .flags = RDMA_NL_ADMIN_PERM, 2946 }, 2947 [RDMA_NLDEV_CMD_DELLINK] = { 2948 .doit = nldev_dellink, 2949 .flags = RDMA_NL_ADMIN_PERM, 2950 }, 2951 [RDMA_NLDEV_CMD_PORT_GET] = { 2952 .doit = nldev_port_get_doit, 2953 .dump = nldev_port_get_dumpit, 2954 }, 2955 [RDMA_NLDEV_CMD_RES_GET] = { 2956 .doit = nldev_res_get_doit, 2957 .dump = nldev_res_get_dumpit, 2958 }, 2959 [RDMA_NLDEV_CMD_RES_QP_GET] = { 2960 .doit = nldev_res_get_qp_doit, 2961 .dump = nldev_res_get_qp_dumpit, 2962 }, 2963 [RDMA_NLDEV_CMD_RES_CM_ID_GET] = { 2964 .doit = nldev_res_get_cm_id_doit, 2965 .dump = nldev_res_get_cm_id_dumpit, 2966 }, 2967 [RDMA_NLDEV_CMD_RES_CQ_GET] = { 2968 .doit = nldev_res_get_cq_doit, 2969 .dump = nldev_res_get_cq_dumpit, 2970 }, 2971 [RDMA_NLDEV_CMD_RES_MR_GET] = { 2972 .doit = nldev_res_get_mr_doit, 2973 .dump = nldev_res_get_mr_dumpit, 2974 }, 2975 [RDMA_NLDEV_CMD_RES_PD_GET] = { 2976 .doit = nldev_res_get_pd_doit, 2977 .dump = nldev_res_get_pd_dumpit, 2978 }, 2979 [RDMA_NLDEV_CMD_RES_CTX_GET] = { 2980 .doit = nldev_res_get_ctx_doit, 2981 .dump = nldev_res_get_ctx_dumpit, 2982 }, 2983 [RDMA_NLDEV_CMD_RES_SRQ_GET] = { 2984 .doit = nldev_res_get_srq_doit, 2985 .dump = nldev_res_get_srq_dumpit, 2986 }, 2987 [RDMA_NLDEV_CMD_SYS_GET] = { 2988 .doit = nldev_sys_get_doit, 2989 }, 2990 [RDMA_NLDEV_CMD_SYS_SET] = { 2991 .doit = nldev_set_sys_set_doit, 2992 .flags = RDMA_NL_ADMIN_PERM, 2993 }, 2994 [RDMA_NLDEV_CMD_STAT_SET] = { 2995 .doit = nldev_stat_set_doit, 2996 .flags = RDMA_NL_ADMIN_PERM, 2997 }, 2998 [RDMA_NLDEV_CMD_STAT_GET] = { 2999 .doit = nldev_stat_get_doit, 3000 .dump = nldev_stat_get_dumpit, 3001 }, 3002 [RDMA_NLDEV_CMD_STAT_DEL] = { 3003 .doit = nldev_stat_del_doit, 3004 .flags = RDMA_NL_ADMIN_PERM, 3005 }, 3006 [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = { 3007 .doit = nldev_res_get_qp_raw_doit, 3008 .dump = nldev_res_get_qp_raw_dumpit, 3009 .flags = RDMA_NL_ADMIN_PERM, 3010 }, 3011 [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = { 3012 .doit = nldev_res_get_cq_raw_doit, 3013 .dump = nldev_res_get_cq_raw_dumpit, 3014 .flags = RDMA_NL_ADMIN_PERM, 3015 }, 3016 [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = { 3017 .doit = nldev_res_get_mr_raw_doit, 3018 .dump = nldev_res_get_mr_raw_dumpit, 3019 .flags = RDMA_NL_ADMIN_PERM, 3020 }, 3021 [RDMA_NLDEV_CMD_RES_SRQ_GET_RAW] = { 3022 .doit = nldev_res_get_srq_raw_doit, 3023 .dump = nldev_res_get_srq_raw_dumpit, 3024 .flags = RDMA_NL_ADMIN_PERM, 3025 }, 3026 [RDMA_NLDEV_CMD_STAT_GET_STATUS] = { 3027 .doit = nldev_stat_get_counter_status_doit, 3028 }, 3029 [RDMA_NLDEV_CMD_NEWDEV] = { 3030 .doit = nldev_newdev, 3031 .flags = RDMA_NL_ADMIN_PERM, 3032 }, 3033 [RDMA_NLDEV_CMD_DELDEV] = { 3034 .doit = nldev_deldev, 3035 .flags = RDMA_NL_ADMIN_PERM, 3036 }, 3037 [RDMA_NLDEV_CMD_FRMR_POOLS_GET] = { 3038 .dump = nldev_frmr_pools_get_dumpit, 3039 }, 3040 [RDMA_NLDEV_CMD_FRMR_POOLS_SET] = { 3041 .doit = nldev_frmr_pools_set_doit, 3042 .flags = RDMA_NL_ADMIN_PERM, 3043 }, 3044 }; 3045 3046 static int fill_mon_netdev_rename(struct sk_buff *msg, 3047 struct ib_device *device, u32 port, 3048 const struct net *net) 3049 { 3050 struct net_device *netdev = ib_device_get_netdev(device, port); 3051 int ret = 0; 3052 3053 if (!netdev || !net_eq(dev_net(netdev), net)) 3054 goto out; 3055 3056 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); 3057 if (ret) 3058 goto out; 3059 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); 3060 out: 3061 dev_put(netdev); 3062 return ret; 3063 } 3064 3065 static int fill_mon_netdev_association(struct sk_buff *msg, 3066 struct ib_device *device, u32 port, 3067 const struct net *net) 3068 { 3069 struct net_device *netdev = ib_device_get_netdev(device, port); 3070 int ret = 0; 3071 3072 if (netdev && !net_eq(dev_net(netdev), net)) 3073 goto out; 3074 3075 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index); 3076 if (ret) 3077 goto out; 3078 3079 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, 3080 dev_name(&device->dev)); 3081 if (ret) 3082 goto out; 3083 3084 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port); 3085 if (ret) 3086 goto out; 3087 3088 if (netdev) { 3089 ret = nla_put_u32(msg, 3090 RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); 3091 if (ret) 3092 goto out; 3093 3094 ret = nla_put_string(msg, 3095 RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); 3096 } 3097 3098 out: 3099 dev_put(netdev); 3100 return ret; 3101 } 3102 3103 static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num, 3104 enum rdma_nl_notify_event_type type) 3105 { 3106 struct net_device *netdev; 3107 3108 switch (type) { 3109 case RDMA_REGISTER_EVENT: 3110 dev_warn_ratelimited(&device->dev, 3111 "Failed to send RDMA monitor register device event\n"); 3112 break; 3113 case RDMA_UNREGISTER_EVENT: 3114 dev_warn_ratelimited(&device->dev, 3115 "Failed to send RDMA monitor unregister device event\n"); 3116 break; 3117 case RDMA_NETDEV_ATTACH_EVENT: 3118 netdev = ib_device_get_netdev(device, port_num); 3119 dev_warn_ratelimited(&device->dev, 3120 "Failed to send RDMA monitor netdev attach event: port %d netdev %d\n", 3121 port_num, netdev->ifindex); 3122 dev_put(netdev); 3123 break; 3124 case RDMA_NETDEV_DETACH_EVENT: 3125 dev_warn_ratelimited(&device->dev, 3126 "Failed to send RDMA monitor netdev detach event: port %d\n", 3127 port_num); 3128 break; 3129 case RDMA_RENAME_EVENT: 3130 dev_warn_ratelimited(&device->dev, 3131 "Failed to send RDMA monitor rename device event\n"); 3132 break; 3133 3134 case RDMA_NETDEV_RENAME_EVENT: 3135 netdev = ib_device_get_netdev(device, port_num); 3136 dev_warn_ratelimited(&device->dev, 3137 "Failed to send RDMA monitor netdev rename event: port %d netdev %d\n", 3138 port_num, netdev->ifindex); 3139 dev_put(netdev); 3140 break; 3141 default: 3142 break; 3143 } 3144 } 3145 3146 int rdma_nl_notify_event(struct ib_device *device, u32 port_num, 3147 enum rdma_nl_notify_event_type type) 3148 { 3149 struct sk_buff *skb; 3150 int ret = -EMSGSIZE; 3151 struct net *net; 3152 void *nlh; 3153 3154 net = read_pnet(&device->coredev.rdma_net); 3155 if (!net) 3156 return -EINVAL; 3157 3158 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 3159 if (!skb) 3160 return -ENOMEM; 3161 nlh = nlmsg_put(skb, 0, 0, 3162 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR), 3163 0, 0); 3164 if (!nlh) 3165 goto err_free; 3166 3167 switch (type) { 3168 case RDMA_REGISTER_EVENT: 3169 case RDMA_UNREGISTER_EVENT: 3170 case RDMA_RENAME_EVENT: 3171 ret = fill_nldev_handle(skb, device); 3172 if (ret) 3173 goto err_free; 3174 break; 3175 case RDMA_NETDEV_ATTACH_EVENT: 3176 case RDMA_NETDEV_DETACH_EVENT: 3177 ret = fill_mon_netdev_association(skb, device, port_num, net); 3178 if (ret) 3179 goto err_free; 3180 break; 3181 case RDMA_NETDEV_RENAME_EVENT: 3182 ret = fill_mon_netdev_rename(skb, device, port_num, net); 3183 if (ret) 3184 goto err_free; 3185 break; 3186 default: 3187 break; 3188 } 3189 3190 ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type); 3191 if (ret) 3192 goto err_free; 3193 3194 nlmsg_end(skb, nlh); 3195 ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL); 3196 if (ret && ret != -ESRCH) { 3197 skb = NULL; /* skb is freed in the netlink send-op handling */ 3198 goto err_free; 3199 } 3200 return 0; 3201 3202 err_free: 3203 rdma_nl_notify_err_msg(device, port_num, type); 3204 nlmsg_free(skb); 3205 return ret; 3206 } 3207 3208 void __init nldev_init(void) 3209 { 3210 rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table); 3211 } 3212 3213 void nldev_exit(void) 3214 { 3215 rdma_nl_unregister(RDMA_NL_NLDEV); 3216 } 3217 3218 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5); 3219