1 /* 2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Neither the names of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * Alternatively, this software may be distributed under the terms of the 17 * GNU General Public License ("GPL") version 2 as published by the Free 18 * Software Foundation. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <linux/module.h> 34 #include <linux/pid.h> 35 #include <linux/pid_namespace.h> 36 #include <linux/mutex.h> 37 #include <net/netlink.h> 38 #include <rdma/rdma_cm.h> 39 #include <rdma/rdma_netlink.h> 40 #include <rdma/frmr_pools.h> 41 42 #include "core_priv.h" 43 #include "cma_priv.h" 44 #include "restrack.h" 45 #include "uverbs.h" 46 #include "frmr_pools.h" 47 48 /* 49 * This determines whether a non-privileged user is allowed to specify a 50 * controlled QKEY or not, when true non-privileged user is allowed to specify 51 * a controlled QKEY. 52 */ 53 static bool privileged_qkey; 54 static DEFINE_MUTEX(nldev_dellink_mutex); 55 56 typedef int (*res_fill_func_t)(struct sk_buff*, bool, 57 struct rdma_restrack_entry*, uint32_t); 58 59 /* 60 * Sort array elements by the netlink attribute name 61 */ 62 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { 63 [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 }, 64 [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 }, 65 [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING, 66 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 67 [RDMA_NLDEV_ATTR_CHARDEV_TYPE] = { .type = NLA_NUL_STRING, 68 .len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE }, 69 [RDMA_NLDEV_ATTR_DEV_DIM] = { .type = NLA_U8 }, 70 [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 }, 71 [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, 72 .len = IB_DEVICE_NAME_MAX }, 73 [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 }, 74 [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING, 75 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 76 [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED }, 77 [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED }, 78 [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 }, 79 [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING, 80 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 81 [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 }, 82 [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 }, 83 [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 }, 84 [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 }, 85 [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING, 86 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 87 [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 }, 88 [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING, 89 .len = IFNAMSIZ }, 90 [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 }, 91 [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 }, 92 [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING, 93 .len = IFNAMSIZ }, 94 [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 }, 95 [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 }, 96 [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 }, 97 [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 }, 98 [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED }, 99 [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 }, 100 [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED }, 101 [RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED }, 102 [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 }, 103 [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 }, 104 [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED }, 105 [RDMA_NLDEV_ATTR_RES_CTX] = { .type = NLA_NESTED }, 106 [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 }, 107 [RDMA_NLDEV_ATTR_RES_CTX_ENTRY] = { .type = NLA_NESTED }, 108 [RDMA_NLDEV_ATTR_RES_DST_ADDR] = { 109 .len = sizeof(struct __kernel_sockaddr_storage) }, 110 [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 }, 111 [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING, 112 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 113 [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 }, 114 [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 }, 115 [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 }, 116 [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED }, 117 [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 }, 118 [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 }, 119 [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED }, 120 [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 }, 121 [RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED }, 122 [RDMA_NLDEV_ATTR_RES_PDN] = { .type = NLA_U32 }, 123 [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED }, 124 [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 }, 125 [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 }, 126 [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 }, 127 [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED }, 128 [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED }, 129 [RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY }, 130 [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 }, 131 [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 }, 132 [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 }, 133 [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 }, 134 [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = { 135 .len = sizeof(struct __kernel_sockaddr_storage) }, 136 [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 }, 137 [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED }, 138 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED }, 139 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 }, 140 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING, 141 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 142 [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 }, 143 [RDMA_NLDEV_ATTR_RES_SUBTYPE] = { .type = NLA_NUL_STRING, 144 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 145 [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 }, 146 [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 }, 147 [RDMA_NLDEV_ATTR_RES_SRQ] = { .type = NLA_NESTED }, 148 [RDMA_NLDEV_ATTR_RES_SRQN] = { .type = NLA_U32 }, 149 [RDMA_NLDEV_ATTR_RES_SRQ_ENTRY] = { .type = NLA_NESTED }, 150 [RDMA_NLDEV_ATTR_MIN_RANGE] = { .type = NLA_U32 }, 151 [RDMA_NLDEV_ATTR_MAX_RANGE] = { .type = NLA_U32 }, 152 [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 }, 153 [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 }, 154 [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = { .type = NLA_U32 }, 155 [RDMA_NLDEV_ATTR_STAT_MODE] = { .type = NLA_U32 }, 156 [RDMA_NLDEV_ATTR_STAT_RES] = { .type = NLA_U32 }, 157 [RDMA_NLDEV_ATTR_STAT_COUNTER] = { .type = NLA_NESTED }, 158 [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY] = { .type = NLA_NESTED }, 159 [RDMA_NLDEV_ATTR_STAT_COUNTER_ID] = { .type = NLA_U32 }, 160 [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS] = { .type = NLA_NESTED }, 161 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY] = { .type = NLA_NESTED }, 162 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING }, 163 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 }, 164 [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 }, 165 [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 }, 166 [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 }, 167 [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, 168 [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 }, 169 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 }, 170 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 }, 171 [RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = { .type = NLA_U8 }, 172 [RDMA_NLDEV_ATTR_DRIVER_DETAILS] = { .type = NLA_U8 }, 173 [RDMA_NLDEV_ATTR_DEV_TYPE] = { .type = NLA_U8 }, 174 [RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING }, 175 [RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 }, 176 [RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 }, 177 [RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED] = { .type = NLA_U8 }, 178 [RDMA_NLDEV_ATTR_FRMR_POOLS] = { .type = NLA_NESTED }, 179 [RDMA_NLDEV_ATTR_FRMR_POOL_ENTRY] = { .type = NLA_NESTED }, 180 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY] = { .type = NLA_NESTED }, 181 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS] = { .type = NLA_U8 }, 182 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS] = { .type = NLA_U32 }, 183 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY] = { .type = NLA_U64 }, 184 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS] = { .type = NLA_U64 }, 185 [RDMA_NLDEV_ATTR_FRMR_POOL_QUEUE_HANDLES] = { .type = NLA_U32 }, 186 [RDMA_NLDEV_ATTR_FRMR_POOL_MAX_IN_USE] = { .type = NLA_U64 }, 187 [RDMA_NLDEV_ATTR_FRMR_POOL_IN_USE] = { .type = NLA_U64 }, 188 [RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD] = { .type = NLA_U32 }, 189 [RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES] = { .type = NLA_U32 }, 190 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY] = { .type = NLA_U64 }, 191 }; 192 193 static int put_driver_name_print_type(struct sk_buff *msg, const char *name, 194 enum rdma_nldev_print_type print_type) 195 { 196 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name)) 197 return -EMSGSIZE; 198 if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC && 199 nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type)) 200 return -EMSGSIZE; 201 202 return 0; 203 } 204 205 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, 206 enum rdma_nldev_print_type print_type, 207 u32 value) 208 { 209 if (put_driver_name_print_type(msg, name, print_type)) 210 return -EMSGSIZE; 211 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value)) 212 return -EMSGSIZE; 213 214 return 0; 215 } 216 217 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, 218 enum rdma_nldev_print_type print_type, 219 u64 value) 220 { 221 if (put_driver_name_print_type(msg, name, print_type)) 222 return -EMSGSIZE; 223 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value, 224 RDMA_NLDEV_ATTR_PAD)) 225 return -EMSGSIZE; 226 227 return 0; 228 } 229 230 int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name, 231 const char *str) 232 { 233 if (put_driver_name_print_type(msg, name, 234 RDMA_NLDEV_PRINT_TYPE_UNSPEC)) 235 return -EMSGSIZE; 236 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str)) 237 return -EMSGSIZE; 238 239 return 0; 240 } 241 EXPORT_SYMBOL(rdma_nl_put_driver_string); 242 243 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value) 244 { 245 return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC, 246 value); 247 } 248 EXPORT_SYMBOL(rdma_nl_put_driver_u32); 249 250 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name, 251 u32 value) 252 { 253 return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX, 254 value); 255 } 256 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex); 257 258 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value) 259 { 260 return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC, 261 value); 262 } 263 EXPORT_SYMBOL(rdma_nl_put_driver_u64); 264 265 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value) 266 { 267 return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX, 268 value); 269 } 270 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex); 271 272 bool rdma_nl_get_privileged_qkey(void) 273 { 274 return privileged_qkey; 275 } 276 EXPORT_SYMBOL(rdma_nl_get_privileged_qkey); 277 278 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device) 279 { 280 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) 281 return -EMSGSIZE; 282 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, 283 dev_name(&device->dev))) 284 return -EMSGSIZE; 285 286 return 0; 287 } 288 289 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) 290 { 291 char fw[IB_FW_VERSION_NAME_MAX]; 292 int ret = 0; 293 u32 port; 294 295 if (fill_nldev_handle(msg, device)) 296 return -EMSGSIZE; 297 298 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device))) 299 return -EMSGSIZE; 300 301 BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64)); 302 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, 303 device->attrs.device_cap_flags, 304 RDMA_NLDEV_ATTR_PAD)) 305 return -EMSGSIZE; 306 307 ib_get_device_fw_str(device, fw); 308 /* Device without FW has strlen(fw) = 0 */ 309 if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw)) 310 return -EMSGSIZE; 311 312 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID, 313 be64_to_cpu(device->node_guid), 314 RDMA_NLDEV_ATTR_PAD)) 315 return -EMSGSIZE; 316 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, 317 be64_to_cpu(device->attrs.sys_image_guid), 318 RDMA_NLDEV_ATTR_PAD)) 319 return -EMSGSIZE; 320 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type)) 321 return -EMSGSIZE; 322 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim)) 323 return -EMSGSIZE; 324 325 if (device->type && 326 nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_TYPE, device->type)) 327 return -EMSGSIZE; 328 329 if (device->parent && 330 nla_put_string(msg, RDMA_NLDEV_ATTR_PARENT_NAME, 331 dev_name(&device->parent->dev))) 332 return -EMSGSIZE; 333 334 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE, 335 device->name_assign_type)) 336 return -EMSGSIZE; 337 338 /* 339 * Link type is determined on first port and mlx4 device 340 * which can potentially have two different link type for the same 341 * IB device is considered as better to be avoided in the future, 342 */ 343 port = rdma_start_port(device); 344 if (rdma_cap_opa_mad(device, port)) 345 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa"); 346 else if (rdma_protocol_ib(device, port)) 347 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib"); 348 else if (rdma_protocol_iwarp(device, port)) 349 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw"); 350 else if (rdma_protocol_roce(device, port)) 351 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce"); 352 else if (rdma_protocol_usnic(device, port)) 353 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, 354 "usnic"); 355 return ret; 356 } 357 358 static int fill_port_info(struct sk_buff *msg, 359 struct ib_device *device, u32 port, 360 const struct net *net) 361 { 362 struct net_device *netdev = NULL; 363 struct ib_port_attr attr; 364 int ret; 365 u64 cap_flags = 0; 366 367 if (fill_nldev_handle(msg, device)) 368 return -EMSGSIZE; 369 370 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) 371 return -EMSGSIZE; 372 373 ret = ib_query_port(device, port, &attr); 374 if (ret) 375 return ret; 376 377 if (rdma_protocol_ib(device, port)) { 378 BUILD_BUG_ON((sizeof(attr.port_cap_flags) + 379 sizeof(attr.port_cap_flags2)) > sizeof(u64)); 380 cap_flags = attr.port_cap_flags | 381 ((u64)attr.port_cap_flags2 << 32); 382 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, 383 cap_flags, RDMA_NLDEV_ATTR_PAD)) 384 return -EMSGSIZE; 385 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, 386 attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD)) 387 return -EMSGSIZE; 388 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid)) 389 return -EMSGSIZE; 390 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid)) 391 return -EMSGSIZE; 392 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc)) 393 return -EMSGSIZE; 394 } 395 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state)) 396 return -EMSGSIZE; 397 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state)) 398 return -EMSGSIZE; 399 400 netdev = ib_device_get_netdev(device, port); 401 if (netdev && net_eq(dev_net(netdev), net)) { 402 ret = nla_put_u32(msg, 403 RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); 404 if (ret) 405 goto out; 406 ret = nla_put_string(msg, 407 RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); 408 } 409 410 out: 411 dev_put(netdev); 412 return ret; 413 } 414 415 static int fill_res_info_entry(struct sk_buff *msg, 416 const char *name, u64 curr) 417 { 418 struct nlattr *entry_attr; 419 420 entry_attr = nla_nest_start_noflag(msg, 421 RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY); 422 if (!entry_attr) 423 return -EMSGSIZE; 424 425 if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name)) 426 goto err; 427 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 428 RDMA_NLDEV_ATTR_PAD)) 429 goto err; 430 431 nla_nest_end(msg, entry_attr); 432 return 0; 433 434 err: 435 nla_nest_cancel(msg, entry_attr); 436 return -EMSGSIZE; 437 } 438 439 static int fill_res_info(struct sk_buff *msg, struct ib_device *device, 440 bool show_details) 441 { 442 static const char * const names[RDMA_RESTRACK_MAX] = { 443 [RDMA_RESTRACK_PD] = "pd", 444 [RDMA_RESTRACK_CQ] = "cq", 445 [RDMA_RESTRACK_QP] = "qp", 446 [RDMA_RESTRACK_CM_ID] = "cm_id", 447 [RDMA_RESTRACK_MR] = "mr", 448 [RDMA_RESTRACK_CTX] = "ctx", 449 [RDMA_RESTRACK_SRQ] = "srq", 450 }; 451 452 struct nlattr *table_attr; 453 int ret, i, curr; 454 455 if (fill_nldev_handle(msg, device)) 456 return -EMSGSIZE; 457 458 table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY); 459 if (!table_attr) 460 return -EMSGSIZE; 461 462 for (i = 0; i < RDMA_RESTRACK_MAX; i++) { 463 if (!names[i]) 464 continue; 465 curr = rdma_restrack_count(device, i, show_details); 466 ret = fill_res_info_entry(msg, names[i], curr); 467 if (ret) 468 goto err; 469 } 470 471 nla_nest_end(msg, table_attr); 472 return 0; 473 474 err: 475 nla_nest_cancel(msg, table_attr); 476 return ret; 477 } 478 479 static int fill_res_name_pid(struct sk_buff *msg, 480 struct rdma_restrack_entry *res) 481 { 482 int err = 0; 483 484 /* 485 * For user resources, user is should read /proc/PID/comm to get the 486 * name of the task file. 487 */ 488 if (rdma_is_kernel_res(res)) { 489 err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, 490 res->kern_name); 491 } else { 492 pid_t pid; 493 494 pid = task_pid_vnr(res->task); 495 /* 496 * Task is dead and in zombie state. 497 * There is no need to print PID anymore. 498 */ 499 if (pid) 500 /* 501 * This part is racy, task can be killed and PID will 502 * be zero right here but it is ok, next query won't 503 * return PID. We don't promise real-time reflection 504 * of SW objects. 505 */ 506 err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid); 507 } 508 509 return err ? -EMSGSIZE : 0; 510 } 511 512 static int fill_res_qp_entry_query(struct sk_buff *msg, 513 struct rdma_restrack_entry *res, 514 struct ib_device *dev, 515 struct ib_qp *qp) 516 { 517 struct ib_qp_init_attr qp_init_attr; 518 struct ib_qp_attr qp_attr; 519 int ret; 520 521 ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr); 522 if (ret) 523 return ret; 524 525 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) { 526 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN, 527 qp_attr.dest_qp_num)) 528 goto err; 529 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN, 530 qp_attr.rq_psn)) 531 goto err; 532 } 533 534 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn)) 535 goto err; 536 537 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC || 538 qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) { 539 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE, 540 qp_attr.path_mig_state)) 541 goto err; 542 } 543 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type)) 544 goto err; 545 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state)) 546 goto err; 547 548 if (dev->ops.fill_res_qp_entry) 549 return dev->ops.fill_res_qp_entry(msg, qp); 550 return 0; 551 552 err: return -EMSGSIZE; 553 } 554 555 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, 556 struct rdma_restrack_entry *res, uint32_t port) 557 { 558 struct ib_qp *qp = container_of(res, struct ib_qp, res); 559 struct ib_device *dev = qp->device; 560 int ret; 561 562 if (port && port != qp->port) 563 return -EAGAIN; 564 565 /* In create_qp() port is not set yet */ 566 if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port)) 567 return -EMSGSIZE; 568 569 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num); 570 if (ret) 571 return -EMSGSIZE; 572 573 if (!rdma_is_kernel_res(res) && 574 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id)) 575 return -EMSGSIZE; 576 577 ret = fill_res_name_pid(msg, res); 578 if (ret) 579 return -EMSGSIZE; 580 581 return fill_res_qp_entry_query(msg, res, dev, qp); 582 } 583 584 static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, 585 struct rdma_restrack_entry *res, uint32_t port) 586 { 587 struct ib_qp *qp = container_of(res, struct ib_qp, res); 588 struct ib_device *dev = qp->device; 589 590 if (port && port != qp->port) 591 return -EAGAIN; 592 if (!dev->ops.fill_res_qp_entry_raw) 593 return -EINVAL; 594 return dev->ops.fill_res_qp_entry_raw(msg, qp); 595 } 596 597 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, 598 struct rdma_restrack_entry *res, uint32_t port) 599 { 600 struct rdma_id_private *id_priv = 601 container_of(res, struct rdma_id_private, res); 602 struct ib_device *dev = id_priv->id.device; 603 struct rdma_cm_id *cm_id = &id_priv->id; 604 605 if (port && port != cm_id->port_num) 606 return -EAGAIN; 607 608 if (cm_id->port_num && 609 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num)) 610 goto err; 611 612 if (id_priv->qp_num) { 613 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num)) 614 goto err; 615 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type)) 616 goto err; 617 } 618 619 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps)) 620 goto err; 621 622 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state)) 623 goto err; 624 625 if (cm_id->route.addr.src_addr.ss_family && 626 nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR, 627 sizeof(cm_id->route.addr.src_addr), 628 &cm_id->route.addr.src_addr)) 629 goto err; 630 if (cm_id->route.addr.dst_addr.ss_family && 631 nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR, 632 sizeof(cm_id->route.addr.dst_addr), 633 &cm_id->route.addr.dst_addr)) 634 goto err; 635 636 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id)) 637 goto err; 638 639 if (fill_res_name_pid(msg, res)) 640 goto err; 641 642 if (dev->ops.fill_res_cm_id_entry) 643 return dev->ops.fill_res_cm_id_entry(msg, cm_id); 644 return 0; 645 646 err: return -EMSGSIZE; 647 } 648 649 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin, 650 struct rdma_restrack_entry *res, uint32_t port) 651 { 652 struct ib_cq *cq = container_of(res, struct ib_cq, res); 653 struct ib_device *dev = cq->device; 654 655 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe)) 656 return -EMSGSIZE; 657 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, 658 atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD)) 659 return -EMSGSIZE; 660 661 /* Poll context is only valid for kernel CQs */ 662 if (rdma_is_kernel_res(res) && 663 nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx)) 664 return -EMSGSIZE; 665 666 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL))) 667 return -EMSGSIZE; 668 669 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id)) 670 return -EMSGSIZE; 671 if (!rdma_is_kernel_res(res) && 672 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, 673 cq->uobject->uevent.uobject.context->res.id)) 674 return -EMSGSIZE; 675 676 if (fill_res_name_pid(msg, res)) 677 return -EMSGSIZE; 678 679 return (dev->ops.fill_res_cq_entry) ? 680 dev->ops.fill_res_cq_entry(msg, cq) : 0; 681 } 682 683 static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, 684 struct rdma_restrack_entry *res, uint32_t port) 685 { 686 struct ib_cq *cq = container_of(res, struct ib_cq, res); 687 struct ib_device *dev = cq->device; 688 689 if (!dev->ops.fill_res_cq_entry_raw) 690 return -EINVAL; 691 return dev->ops.fill_res_cq_entry_raw(msg, cq); 692 } 693 694 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, 695 struct rdma_restrack_entry *res, uint32_t port) 696 { 697 struct ib_mr *mr = container_of(res, struct ib_mr, res); 698 struct ib_device *dev = mr->pd->device; 699 700 if (has_cap_net_admin) { 701 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey)) 702 return -EMSGSIZE; 703 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey)) 704 return -EMSGSIZE; 705 } 706 707 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 708 RDMA_NLDEV_ATTR_PAD)) 709 return -EMSGSIZE; 710 711 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) 712 return -EMSGSIZE; 713 714 if (!rdma_is_kernel_res(res) && 715 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id)) 716 return -EMSGSIZE; 717 718 if (fill_res_name_pid(msg, res)) 719 return -EMSGSIZE; 720 721 return (dev->ops.fill_res_mr_entry) ? 722 dev->ops.fill_res_mr_entry(msg, mr) : 723 0; 724 } 725 726 static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, 727 struct rdma_restrack_entry *res, uint32_t port) 728 { 729 struct ib_mr *mr = container_of(res, struct ib_mr, res); 730 struct ib_device *dev = mr->pd->device; 731 732 if (!dev->ops.fill_res_mr_entry_raw) 733 return -EINVAL; 734 return dev->ops.fill_res_mr_entry_raw(msg, mr); 735 } 736 737 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, 738 struct rdma_restrack_entry *res, uint32_t port) 739 { 740 struct ib_pd *pd = container_of(res, struct ib_pd, res); 741 742 if (has_cap_net_admin) { 743 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, 744 pd->local_dma_lkey)) 745 goto err; 746 if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) && 747 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, 748 pd->unsafe_global_rkey)) 749 goto err; 750 } 751 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, 752 atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD)) 753 goto err; 754 755 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id)) 756 goto err; 757 758 if (!rdma_is_kernel_res(res) && 759 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, 760 pd->uobject->context->res.id)) 761 goto err; 762 763 return fill_res_name_pid(msg, res); 764 765 err: return -EMSGSIZE; 766 } 767 768 static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin, 769 struct rdma_restrack_entry *res, uint32_t port) 770 { 771 struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res); 772 773 if (rdma_is_kernel_res(res)) 774 return 0; 775 776 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id)) 777 return -EMSGSIZE; 778 779 return fill_res_name_pid(msg, res); 780 } 781 782 static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range, 783 uint32_t max_range) 784 { 785 struct nlattr *entry_attr; 786 787 if (!min_range) 788 return 0; 789 790 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY); 791 if (!entry_attr) 792 return -EMSGSIZE; 793 794 if (min_range == max_range) { 795 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range)) 796 goto err; 797 } else { 798 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range)) 799 goto err; 800 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range)) 801 goto err; 802 } 803 nla_nest_end(msg, entry_attr); 804 return 0; 805 806 err: 807 nla_nest_cancel(msg, entry_attr); 808 return -EMSGSIZE; 809 } 810 811 static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq) 812 { 813 uint32_t min_range = 0, prev = 0; 814 struct rdma_restrack_entry *res; 815 struct rdma_restrack_root *rt; 816 struct nlattr *table_attr; 817 struct ib_qp *qp = NULL; 818 unsigned long id = 0; 819 820 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP); 821 if (!table_attr) 822 return -EMSGSIZE; 823 824 rt = &srq->device->res[RDMA_RESTRACK_QP]; 825 xa_lock(&rt->xa); 826 xa_for_each(&rt->xa, id, res) { 827 if (!rdma_restrack_get(res)) 828 continue; 829 830 qp = container_of(res, struct ib_qp, res); 831 if (!qp->srq || (qp->srq->res.id != srq->res.id)) { 832 rdma_restrack_put(res); 833 continue; 834 } 835 836 if (qp->qp_num < prev) 837 /* qp_num should be ascending */ 838 goto err_loop; 839 840 if (min_range == 0) { 841 min_range = qp->qp_num; 842 } else if (qp->qp_num > (prev + 1)) { 843 if (fill_res_range_qp_entry(msg, min_range, prev)) 844 goto err_loop; 845 846 min_range = qp->qp_num; 847 } 848 prev = qp->qp_num; 849 rdma_restrack_put(res); 850 } 851 852 xa_unlock(&rt->xa); 853 854 if (fill_res_range_qp_entry(msg, min_range, prev)) 855 goto err; 856 857 nla_nest_end(msg, table_attr); 858 return 0; 859 860 err_loop: 861 rdma_restrack_put(res); 862 xa_unlock(&rt->xa); 863 err: 864 nla_nest_cancel(msg, table_attr); 865 return -EMSGSIZE; 866 } 867 868 static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin, 869 struct rdma_restrack_entry *res, uint32_t port) 870 { 871 struct ib_srq *srq = container_of(res, struct ib_srq, res); 872 struct ib_device *dev = srq->device; 873 874 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id)) 875 goto err; 876 877 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type)) 878 goto err; 879 880 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id)) 881 goto err; 882 883 if (ib_srq_has_cq(srq->srq_type)) { 884 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, 885 srq->ext.cq->res.id)) 886 goto err; 887 } 888 889 if (fill_res_srq_qps(msg, srq)) 890 goto err; 891 892 if (fill_res_name_pid(msg, res)) 893 goto err; 894 895 if (dev->ops.fill_res_srq_entry) 896 return dev->ops.fill_res_srq_entry(msg, srq); 897 898 return 0; 899 900 err: 901 return -EMSGSIZE; 902 } 903 904 static int fill_res_srq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, 905 struct rdma_restrack_entry *res, uint32_t port) 906 { 907 struct ib_srq *srq = container_of(res, struct ib_srq, res); 908 struct ib_device *dev = srq->device; 909 910 if (!dev->ops.fill_res_srq_entry_raw) 911 return -EINVAL; 912 return dev->ops.fill_res_srq_entry_raw(msg, srq); 913 } 914 915 static int fill_stat_counter_mode(struct sk_buff *msg, 916 struct rdma_counter *counter) 917 { 918 struct rdma_counter_mode *m = &counter->mode; 919 920 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode)) 921 return -EMSGSIZE; 922 923 if (m->mode == RDMA_COUNTER_MODE_AUTO) { 924 if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) && 925 nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type)) 926 return -EMSGSIZE; 927 928 if ((m->mask & RDMA_COUNTER_MASK_PID) && 929 fill_res_name_pid(msg, &counter->res)) 930 return -EMSGSIZE; 931 } 932 933 return 0; 934 } 935 936 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn) 937 { 938 struct nlattr *entry_attr; 939 940 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY); 941 if (!entry_attr) 942 return -EMSGSIZE; 943 944 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) 945 goto err; 946 947 nla_nest_end(msg, entry_attr); 948 return 0; 949 950 err: 951 nla_nest_cancel(msg, entry_attr); 952 return -EMSGSIZE; 953 } 954 955 static int fill_stat_counter_qps(struct sk_buff *msg, 956 struct rdma_counter *counter) 957 { 958 struct rdma_restrack_entry *res; 959 struct rdma_restrack_root *rt; 960 struct nlattr *table_attr; 961 struct ib_qp *qp = NULL; 962 unsigned long id = 0; 963 int ret = 0; 964 965 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP); 966 if (!table_attr) 967 return -EMSGSIZE; 968 969 rt = &counter->device->res[RDMA_RESTRACK_QP]; 970 xa_lock(&rt->xa); 971 xa_for_each(&rt->xa, id, res) { 972 qp = container_of(res, struct ib_qp, res); 973 if (!qp->counter || (qp->counter->id != counter->id)) 974 continue; 975 976 ret = fill_stat_counter_qp_entry(msg, qp->qp_num); 977 if (ret) 978 goto err; 979 } 980 981 xa_unlock(&rt->xa); 982 nla_nest_end(msg, table_attr); 983 return 0; 984 985 err: 986 xa_unlock(&rt->xa); 987 nla_nest_cancel(msg, table_attr); 988 return ret; 989 } 990 991 int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, 992 u64 value) 993 { 994 struct nlattr *entry_attr; 995 996 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY); 997 if (!entry_attr) 998 return -EMSGSIZE; 999 1000 if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, 1001 name)) 1002 goto err; 1003 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE, 1004 value, RDMA_NLDEV_ATTR_PAD)) 1005 goto err; 1006 1007 nla_nest_end(msg, entry_attr); 1008 return 0; 1009 1010 err: 1011 nla_nest_cancel(msg, entry_attr); 1012 return -EMSGSIZE; 1013 } 1014 EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry); 1015 1016 static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, 1017 struct rdma_restrack_entry *res, uint32_t port) 1018 { 1019 struct ib_mr *mr = container_of(res, struct ib_mr, res); 1020 struct ib_device *dev = mr->pd->device; 1021 1022 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) 1023 goto err; 1024 1025 if (dev->ops.fill_stat_mr_entry) 1026 return dev->ops.fill_stat_mr_entry(msg, mr); 1027 return 0; 1028 1029 err: 1030 return -EMSGSIZE; 1031 } 1032 1033 static int fill_stat_counter_hwcounters(struct sk_buff *msg, 1034 struct rdma_counter *counter) 1035 { 1036 struct rdma_hw_stats *st = counter->stats; 1037 struct nlattr *table_attr; 1038 int i; 1039 1040 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); 1041 if (!table_attr) 1042 return -EMSGSIZE; 1043 1044 mutex_lock(&st->lock); 1045 for (i = 0; i < st->num_counters; i++) { 1046 if (test_bit(i, st->is_disabled)) 1047 continue; 1048 if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name, 1049 st->value[i])) 1050 goto err; 1051 } 1052 mutex_unlock(&st->lock); 1053 1054 nla_nest_end(msg, table_attr); 1055 return 0; 1056 1057 err: 1058 mutex_unlock(&st->lock); 1059 nla_nest_cancel(msg, table_attr); 1060 return -EMSGSIZE; 1061 } 1062 1063 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin, 1064 struct rdma_restrack_entry *res, 1065 uint32_t port) 1066 { 1067 struct rdma_counter *counter = 1068 container_of(res, struct rdma_counter, res); 1069 1070 if (port && port != counter->port) 1071 return -EAGAIN; 1072 1073 /* Dump it even query failed */ 1074 rdma_counter_query_stats(counter); 1075 1076 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) || 1077 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) || 1078 fill_stat_counter_mode(msg, counter) || 1079 fill_stat_counter_qps(msg, counter) || 1080 fill_stat_counter_hwcounters(msg, counter)) 1081 return -EMSGSIZE; 1082 1083 return 0; 1084 } 1085 1086 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1087 struct netlink_ext_ack *extack) 1088 { 1089 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1090 struct ib_device *device; 1091 struct sk_buff *msg; 1092 u32 index; 1093 int err; 1094 1095 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1096 nldev_policy, NL_VALIDATE_LIBERAL, extack); 1097 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1098 return -EINVAL; 1099 1100 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1101 1102 device = ib_device_get_by_index(sock_net(skb->sk), index); 1103 if (!device) 1104 return -EINVAL; 1105 1106 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1107 if (!msg) { 1108 err = -ENOMEM; 1109 goto err; 1110 } 1111 1112 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1113 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 1114 0, 0); 1115 if (!nlh) { 1116 err = -EMSGSIZE; 1117 goto err_free; 1118 } 1119 1120 err = fill_dev_info(msg, device); 1121 if (err) 1122 goto err_free; 1123 1124 nlmsg_end(msg, nlh); 1125 1126 ib_device_put(device); 1127 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 1128 1129 err_free: 1130 nlmsg_free(msg); 1131 err: 1132 ib_device_put(device); 1133 return err; 1134 } 1135 1136 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1137 struct netlink_ext_ack *extack) 1138 { 1139 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1140 struct ib_device *device; 1141 u32 index; 1142 int err; 1143 1144 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1145 nldev_policy, extack); 1146 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1147 return -EINVAL; 1148 1149 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1150 device = ib_device_get_by_index(sock_net(skb->sk), index); 1151 if (!device) 1152 return -EINVAL; 1153 1154 if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) { 1155 char name[IB_DEVICE_NAME_MAX] = {}; 1156 1157 nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], 1158 IB_DEVICE_NAME_MAX); 1159 if (strlen(name) == 0) { 1160 err = -EINVAL; 1161 goto done; 1162 } 1163 err = ib_device_rename(device, name); 1164 goto done; 1165 } 1166 1167 if (tb[RDMA_NLDEV_NET_NS_FD]) { 1168 u32 ns_fd; 1169 1170 ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]); 1171 err = ib_device_set_netns_put(skb, device, ns_fd); 1172 goto put_done; 1173 } 1174 1175 if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) { 1176 u8 use_dim; 1177 1178 use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]); 1179 err = ib_device_set_dim(device, use_dim); 1180 goto done; 1181 } 1182 1183 done: 1184 ib_device_put(device); 1185 put_done: 1186 return err; 1187 } 1188 1189 static int _nldev_get_dumpit(struct ib_device *device, 1190 struct sk_buff *skb, 1191 struct netlink_callback *cb, 1192 unsigned int idx) 1193 { 1194 int start = cb->args[0]; 1195 struct nlmsghdr *nlh; 1196 1197 if (idx < start) 1198 return 0; 1199 1200 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 1201 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 1202 0, NLM_F_MULTI); 1203 1204 if (!nlh || fill_dev_info(skb, device)) { 1205 nlmsg_cancel(skb, nlh); 1206 goto out; 1207 } 1208 1209 nlmsg_end(skb, nlh); 1210 1211 idx++; 1212 1213 out: cb->args[0] = idx; 1214 return skb->len; 1215 } 1216 1217 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) 1218 { 1219 /* 1220 * There is no need to take lock, because 1221 * we are relying on ib_core's locking. 1222 */ 1223 return ib_enum_all_devs(_nldev_get_dumpit, skb, cb); 1224 } 1225 1226 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1227 struct netlink_ext_ack *extack) 1228 { 1229 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1230 struct ib_device *device; 1231 struct sk_buff *msg; 1232 u32 index; 1233 u32 port; 1234 int err; 1235 1236 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1237 nldev_policy, NL_VALIDATE_LIBERAL, extack); 1238 if (err || 1239 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || 1240 !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 1241 return -EINVAL; 1242 1243 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1244 device = ib_device_get_by_index(sock_net(skb->sk), index); 1245 if (!device) 1246 return -EINVAL; 1247 1248 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 1249 if (!rdma_is_port_valid(device, port)) { 1250 err = -EINVAL; 1251 goto err; 1252 } 1253 1254 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1255 if (!msg) { 1256 err = -ENOMEM; 1257 goto err; 1258 } 1259 1260 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1261 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 1262 0, 0); 1263 if (!nlh) { 1264 err = -EMSGSIZE; 1265 goto err_free; 1266 } 1267 1268 err = fill_port_info(msg, device, port, sock_net(skb->sk)); 1269 if (err) 1270 goto err_free; 1271 1272 nlmsg_end(msg, nlh); 1273 ib_device_put(device); 1274 1275 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 1276 1277 err_free: 1278 nlmsg_free(msg); 1279 err: 1280 ib_device_put(device); 1281 return err; 1282 } 1283 1284 static int nldev_port_get_dumpit(struct sk_buff *skb, 1285 struct netlink_callback *cb) 1286 { 1287 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1288 struct ib_device *device; 1289 int start = cb->args[0]; 1290 struct nlmsghdr *nlh; 1291 u32 idx = 0; 1292 u32 ifindex; 1293 int err; 1294 unsigned int p; 1295 1296 err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1297 nldev_policy, NL_VALIDATE_LIBERAL, NULL); 1298 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1299 return -EINVAL; 1300 1301 ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1302 device = ib_device_get_by_index(sock_net(skb->sk), ifindex); 1303 if (!device) 1304 return -EINVAL; 1305 1306 rdma_for_each_port (device, p) { 1307 /* 1308 * The dumpit function returns all information from specific 1309 * index. This specific index is taken from the netlink 1310 * messages request sent by user and it is available 1311 * in cb->args[0]. 1312 * 1313 * Usually, the user doesn't fill this field and it causes 1314 * to return everything. 1315 * 1316 */ 1317 if (idx < start) { 1318 idx++; 1319 continue; 1320 } 1321 1322 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 1323 cb->nlh->nlmsg_seq, 1324 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1325 RDMA_NLDEV_CMD_PORT_GET), 1326 0, NLM_F_MULTI); 1327 1328 if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) { 1329 nlmsg_cancel(skb, nlh); 1330 goto out; 1331 } 1332 idx++; 1333 nlmsg_end(skb, nlh); 1334 } 1335 1336 out: 1337 ib_device_put(device); 1338 cb->args[0] = idx; 1339 return skb->len; 1340 } 1341 1342 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1343 struct netlink_ext_ack *extack) 1344 { 1345 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1346 bool show_details = false; 1347 struct ib_device *device; 1348 struct sk_buff *msg; 1349 u32 index; 1350 int ret; 1351 1352 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1353 nldev_policy, NL_VALIDATE_LIBERAL, extack); 1354 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1355 return -EINVAL; 1356 1357 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1358 device = ib_device_get_by_index(sock_net(skb->sk), index); 1359 if (!device) 1360 return -EINVAL; 1361 1362 if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]) 1363 show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]); 1364 1365 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1366 if (!msg) { 1367 ret = -ENOMEM; 1368 goto err; 1369 } 1370 1371 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1372 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 1373 0, 0); 1374 if (!nlh) { 1375 ret = -EMSGSIZE; 1376 goto err_free; 1377 } 1378 1379 ret = fill_res_info(msg, device, show_details); 1380 if (ret) 1381 goto err_free; 1382 1383 nlmsg_end(msg, nlh); 1384 ib_device_put(device); 1385 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 1386 1387 err_free: 1388 nlmsg_free(msg); 1389 err: 1390 ib_device_put(device); 1391 return ret; 1392 } 1393 1394 static int _nldev_res_get_dumpit(struct ib_device *device, 1395 struct sk_buff *skb, 1396 struct netlink_callback *cb, 1397 unsigned int idx) 1398 { 1399 int start = cb->args[0]; 1400 struct nlmsghdr *nlh; 1401 1402 if (idx < start) 1403 return 0; 1404 1405 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 1406 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 1407 0, NLM_F_MULTI); 1408 1409 if (!nlh || fill_res_info(skb, device, false)) { 1410 nlmsg_cancel(skb, nlh); 1411 goto out; 1412 } 1413 nlmsg_end(skb, nlh); 1414 1415 idx++; 1416 1417 out: 1418 cb->args[0] = idx; 1419 return skb->len; 1420 } 1421 1422 static int nldev_res_get_dumpit(struct sk_buff *skb, 1423 struct netlink_callback *cb) 1424 { 1425 return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb); 1426 } 1427 1428 struct nldev_fill_res_entry { 1429 enum rdma_nldev_attr nldev_attr; 1430 u8 flags; 1431 u32 entry; 1432 u32 id; 1433 }; 1434 1435 enum nldev_res_flags { 1436 NLDEV_PER_DEV = 1 << 0, 1437 }; 1438 1439 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { 1440 [RDMA_RESTRACK_QP] = { 1441 .nldev_attr = RDMA_NLDEV_ATTR_RES_QP, 1442 .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY, 1443 .id = RDMA_NLDEV_ATTR_RES_LQPN, 1444 }, 1445 [RDMA_RESTRACK_CM_ID] = { 1446 .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID, 1447 .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, 1448 .id = RDMA_NLDEV_ATTR_RES_CM_IDN, 1449 }, 1450 [RDMA_RESTRACK_CQ] = { 1451 .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ, 1452 .flags = NLDEV_PER_DEV, 1453 .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY, 1454 .id = RDMA_NLDEV_ATTR_RES_CQN, 1455 }, 1456 [RDMA_RESTRACK_MR] = { 1457 .nldev_attr = RDMA_NLDEV_ATTR_RES_MR, 1458 .flags = NLDEV_PER_DEV, 1459 .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY, 1460 .id = RDMA_NLDEV_ATTR_RES_MRN, 1461 }, 1462 [RDMA_RESTRACK_PD] = { 1463 .nldev_attr = RDMA_NLDEV_ATTR_RES_PD, 1464 .flags = NLDEV_PER_DEV, 1465 .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY, 1466 .id = RDMA_NLDEV_ATTR_RES_PDN, 1467 }, 1468 [RDMA_RESTRACK_COUNTER] = { 1469 .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER, 1470 .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, 1471 .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID, 1472 }, 1473 [RDMA_RESTRACK_CTX] = { 1474 .nldev_attr = RDMA_NLDEV_ATTR_RES_CTX, 1475 .flags = NLDEV_PER_DEV, 1476 .entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY, 1477 .id = RDMA_NLDEV_ATTR_RES_CTXN, 1478 }, 1479 [RDMA_RESTRACK_SRQ] = { 1480 .nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ, 1481 .flags = NLDEV_PER_DEV, 1482 .entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY, 1483 .id = RDMA_NLDEV_ATTR_RES_SRQN, 1484 }, 1485 1486 }; 1487 1488 static noinline_for_stack int 1489 res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1490 struct netlink_ext_ack *extack, 1491 enum rdma_restrack_type res_type, 1492 res_fill_func_t fill_func) 1493 { 1494 const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; 1495 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1496 struct rdma_restrack_entry *res; 1497 struct ib_device *device; 1498 u32 index, id, port = 0; 1499 bool has_cap_net_admin; 1500 struct sk_buff *msg; 1501 int ret; 1502 1503 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1504 nldev_policy, NL_VALIDATE_LIBERAL, extack); 1505 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id]) 1506 return -EINVAL; 1507 1508 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1509 device = ib_device_get_by_index(sock_net(skb->sk), index); 1510 if (!device) 1511 return -EINVAL; 1512 1513 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { 1514 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 1515 if (!rdma_is_port_valid(device, port)) { 1516 ret = -EINVAL; 1517 goto err; 1518 } 1519 } 1520 1521 if ((port && fe->flags & NLDEV_PER_DEV) || 1522 (!port && ~fe->flags & NLDEV_PER_DEV)) { 1523 ret = -EINVAL; 1524 goto err; 1525 } 1526 1527 id = nla_get_u32(tb[fe->id]); 1528 res = rdma_restrack_get_byid(device, res_type, id); 1529 if (IS_ERR(res)) { 1530 ret = PTR_ERR(res); 1531 goto err; 1532 } 1533 1534 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1535 if (!msg) { 1536 ret = -ENOMEM; 1537 goto err_get; 1538 } 1539 1540 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1541 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1542 RDMA_NL_GET_OP(nlh->nlmsg_type)), 1543 0, 0); 1544 1545 if (!nlh || fill_nldev_handle(msg, device)) { 1546 ret = -EMSGSIZE; 1547 goto err_free; 1548 } 1549 1550 has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN); 1551 1552 ret = fill_func(msg, has_cap_net_admin, res, port); 1553 if (ret) 1554 goto err_free; 1555 1556 rdma_restrack_put(res); 1557 nlmsg_end(msg, nlh); 1558 ib_device_put(device); 1559 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 1560 1561 err_free: 1562 nlmsg_free(msg); 1563 err_get: 1564 rdma_restrack_put(res); 1565 err: 1566 ib_device_put(device); 1567 return ret; 1568 } 1569 1570 static int res_get_common_dumpit(struct sk_buff *skb, 1571 struct netlink_callback *cb, 1572 enum rdma_restrack_type res_type, 1573 res_fill_func_t fill_func) 1574 { 1575 const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; 1576 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1577 struct rdma_restrack_entry *res; 1578 struct rdma_restrack_root *rt; 1579 int err, ret = 0, idx = 0; 1580 bool show_details = false; 1581 struct nlattr *table_attr; 1582 struct nlattr *entry_attr; 1583 struct ib_device *device; 1584 int start = cb->args[0]; 1585 bool has_cap_net_admin; 1586 struct nlmsghdr *nlh; 1587 unsigned long id; 1588 u32 index, port = 0; 1589 bool filled = false; 1590 1591 err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1592 nldev_policy, NL_VALIDATE_LIBERAL, NULL); 1593 /* 1594 * Right now, we are expecting the device index to get res information, 1595 * but it is possible to extend this code to return all devices in 1596 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX. 1597 * if it doesn't exist, we will iterate over all devices. 1598 * 1599 * But it is not needed for now. 1600 */ 1601 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1602 return -EINVAL; 1603 1604 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1605 device = ib_device_get_by_index(sock_net(skb->sk), index); 1606 if (!device) 1607 return -EINVAL; 1608 1609 if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]) 1610 show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]); 1611 1612 /* 1613 * If no PORT_INDEX is supplied, we will return all QPs from that device 1614 */ 1615 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { 1616 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 1617 if (!rdma_is_port_valid(device, port)) { 1618 ret = -EINVAL; 1619 goto err_index; 1620 } 1621 } 1622 1623 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 1624 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1625 RDMA_NL_GET_OP(cb->nlh->nlmsg_type)), 1626 0, NLM_F_MULTI); 1627 1628 if (!nlh || fill_nldev_handle(skb, device)) { 1629 ret = -EMSGSIZE; 1630 goto err; 1631 } 1632 1633 table_attr = nla_nest_start_noflag(skb, fe->nldev_attr); 1634 if (!table_attr) { 1635 ret = -EMSGSIZE; 1636 goto err; 1637 } 1638 1639 has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN); 1640 1641 rt = &device->res[res_type]; 1642 xa_lock(&rt->xa); 1643 /* 1644 * FIXME: if the skip ahead is something common this loop should 1645 * use xas_for_each & xas_pause to optimize, we can have a lot of 1646 * objects. 1647 */ 1648 xa_for_each(&rt->xa, id, res) { 1649 if (xa_get_mark(&rt->xa, res->id, RESTRACK_DD) && !show_details) 1650 goto next; 1651 1652 if (idx < start || !rdma_restrack_get(res)) 1653 goto next; 1654 1655 xa_unlock(&rt->xa); 1656 1657 filled = true; 1658 1659 entry_attr = nla_nest_start_noflag(skb, fe->entry); 1660 if (!entry_attr) { 1661 ret = -EMSGSIZE; 1662 rdma_restrack_put(res); 1663 goto msg_full; 1664 } 1665 1666 ret = fill_func(skb, has_cap_net_admin, res, port); 1667 1668 rdma_restrack_put(res); 1669 1670 if (ret) { 1671 nla_nest_cancel(skb, entry_attr); 1672 if (ret == -EMSGSIZE) 1673 goto msg_full; 1674 if (ret == -EAGAIN) 1675 goto again; 1676 goto res_err; 1677 } 1678 nla_nest_end(skb, entry_attr); 1679 again: xa_lock(&rt->xa); 1680 next: idx++; 1681 } 1682 xa_unlock(&rt->xa); 1683 1684 msg_full: 1685 nla_nest_end(skb, table_attr); 1686 nlmsg_end(skb, nlh); 1687 cb->args[0] = idx; 1688 1689 /* 1690 * No more entries to fill, cancel the message and 1691 * return 0 to mark end of dumpit. 1692 */ 1693 if (!filled) 1694 goto err; 1695 1696 ib_device_put(device); 1697 return skb->len; 1698 1699 res_err: 1700 nla_nest_cancel(skb, table_attr); 1701 1702 err: 1703 nlmsg_cancel(skb, nlh); 1704 1705 err_index: 1706 ib_device_put(device); 1707 return ret; 1708 } 1709 1710 #define RES_GET_FUNCS(name, type) \ 1711 static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ 1712 struct netlink_callback *cb) \ 1713 { \ 1714 return res_get_common_dumpit(skb, cb, type, \ 1715 fill_res_##name##_entry); \ 1716 } \ 1717 static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ 1718 struct nlmsghdr *nlh, \ 1719 struct netlink_ext_ack *extack) \ 1720 { \ 1721 return res_get_common_doit(skb, nlh, extack, type, \ 1722 fill_res_##name##_entry); \ 1723 } 1724 1725 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP); 1726 RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP); 1727 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID); 1728 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ); 1729 RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ); 1730 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD); 1731 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR); 1732 RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR); 1733 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER); 1734 RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX); 1735 RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ); 1736 RES_GET_FUNCS(srq_raw, RDMA_RESTRACK_SRQ); 1737 1738 static LIST_HEAD(link_ops); 1739 static DECLARE_RWSEM(link_ops_rwsem); 1740 1741 static const struct rdma_link_ops *link_ops_get(const char *type) 1742 { 1743 const struct rdma_link_ops *ops; 1744 1745 list_for_each_entry(ops, &link_ops, list) { 1746 if (!strcmp(ops->type, type)) 1747 goto out; 1748 } 1749 ops = NULL; 1750 out: 1751 return ops; 1752 } 1753 1754 void rdma_link_register(struct rdma_link_ops *ops) 1755 { 1756 down_write(&link_ops_rwsem); 1757 if (WARN_ON_ONCE(link_ops_get(ops->type))) 1758 goto out; 1759 list_add(&ops->list, &link_ops); 1760 out: 1761 up_write(&link_ops_rwsem); 1762 } 1763 EXPORT_SYMBOL(rdma_link_register); 1764 1765 void rdma_link_unregister(struct rdma_link_ops *ops) 1766 { 1767 down_write(&link_ops_rwsem); 1768 list_del(&ops->list); 1769 up_write(&link_ops_rwsem); 1770 } 1771 EXPORT_SYMBOL(rdma_link_unregister); 1772 1773 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, 1774 struct netlink_ext_ack *extack) 1775 { 1776 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1777 char ibdev_name[IB_DEVICE_NAME_MAX]; 1778 const struct rdma_link_ops *ops; 1779 char ndev_name[IFNAMSIZ]; 1780 struct net_device *ndev; 1781 char type[IFNAMSIZ]; 1782 int err; 1783 1784 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1785 nldev_policy, extack); 1786 if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || 1787 !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME]) 1788 return -EINVAL; 1789 1790 nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], 1791 sizeof(ibdev_name)); 1792 if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0) 1793 return -EINVAL; 1794 1795 nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); 1796 nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME], 1797 sizeof(ndev_name)); 1798 1799 ndev = dev_get_by_name(sock_net(skb->sk), ndev_name); 1800 if (!ndev) 1801 return -ENODEV; 1802 1803 down_read(&link_ops_rwsem); 1804 ops = link_ops_get(type); 1805 #ifdef CONFIG_MODULES 1806 if (!ops) { 1807 up_read(&link_ops_rwsem); 1808 request_module("rdma-link-%s", type); 1809 down_read(&link_ops_rwsem); 1810 ops = link_ops_get(type); 1811 } 1812 #endif 1813 err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL; 1814 up_read(&link_ops_rwsem); 1815 dev_put(ndev); 1816 1817 return err; 1818 } 1819 1820 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, 1821 struct netlink_ext_ack *extack) 1822 { 1823 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1824 struct ib_device *device; 1825 u32 index; 1826 int err; 1827 1828 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1829 nldev_policy, extack); 1830 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1831 return -EINVAL; 1832 1833 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1834 device = ib_device_get_by_index(sock_net(skb->sk), index); 1835 if (!device) 1836 return -EINVAL; 1837 1838 if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) { 1839 ib_device_put(device); 1840 return -EINVAL; 1841 } 1842 1843 /* 1844 * This path is triggered by the 'rdma link delete' administrative command. 1845 * For Soft-RoCE (RXE), we ensure that transport sockets are closed here. 1846 * Note: iWARP driver does not implement .dellink, so this logic is 1847 * implicitly scoped to the driver supporting dynamic link deletion like RXE. 1848 */ 1849 if (device->link_ops && device->link_ops->dellink) { 1850 mutex_lock(&nldev_dellink_mutex); 1851 err = device->link_ops->dellink(device); 1852 mutex_unlock(&nldev_dellink_mutex); 1853 if (err) 1854 return err; 1855 } 1856 1857 ib_unregister_device_and_put(device); 1858 return 0; 1859 } 1860 1861 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh, 1862 struct netlink_ext_ack *extack) 1863 { 1864 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1865 char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE]; 1866 struct ib_client_nl_info data = {}; 1867 struct ib_device *ibdev = NULL; 1868 struct sk_buff *msg; 1869 u32 index; 1870 int err; 1871 1872 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, 1873 NL_VALIDATE_LIBERAL, extack); 1874 if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE]) 1875 return -EINVAL; 1876 1877 nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE], 1878 sizeof(client_name)); 1879 1880 if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) { 1881 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1882 ibdev = ib_device_get_by_index(sock_net(skb->sk), index); 1883 if (!ibdev) 1884 return -EINVAL; 1885 1886 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { 1887 data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 1888 if (!rdma_is_port_valid(ibdev, data.port)) { 1889 err = -EINVAL; 1890 goto out_put; 1891 } 1892 } else { 1893 data.port = -1; 1894 } 1895 } else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { 1896 return -EINVAL; 1897 } 1898 1899 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1900 if (!msg) { 1901 err = -ENOMEM; 1902 goto out_put; 1903 } 1904 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1905 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1906 RDMA_NLDEV_CMD_GET_CHARDEV), 1907 0, 0); 1908 if (!nlh) { 1909 err = -EMSGSIZE; 1910 goto out_nlmsg; 1911 } 1912 1913 data.nl_msg = msg; 1914 err = ib_get_client_nl_info(ibdev, client_name, &data); 1915 if (err) 1916 goto out_nlmsg; 1917 1918 err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV, 1919 huge_encode_dev(data.cdev->devt), 1920 RDMA_NLDEV_ATTR_PAD); 1921 if (err) 1922 goto out_data; 1923 err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi, 1924 RDMA_NLDEV_ATTR_PAD); 1925 if (err) 1926 goto out_data; 1927 if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME, 1928 dev_name(data.cdev))) { 1929 err = -EMSGSIZE; 1930 goto out_data; 1931 } 1932 1933 nlmsg_end(msg, nlh); 1934 put_device(data.cdev); 1935 if (ibdev) 1936 ib_device_put(ibdev); 1937 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 1938 1939 out_data: 1940 put_device(data.cdev); 1941 out_nlmsg: 1942 nlmsg_free(msg); 1943 out_put: 1944 if (ibdev) 1945 ib_device_put(ibdev); 1946 return err; 1947 } 1948 1949 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1950 struct netlink_ext_ack *extack) 1951 { 1952 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1953 struct sk_buff *msg; 1954 int err; 1955 1956 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1957 nldev_policy, NL_VALIDATE_LIBERAL, extack); 1958 if (err) 1959 return err; 1960 1961 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1962 if (!msg) 1963 return -ENOMEM; 1964 1965 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1966 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1967 RDMA_NLDEV_CMD_SYS_GET), 1968 0, 0); 1969 if (!nlh) { 1970 nlmsg_free(msg); 1971 return -EMSGSIZE; 1972 } 1973 1974 err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE, 1975 (u8)ib_devices_shared_netns); 1976 if (err) { 1977 nlmsg_free(msg); 1978 return err; 1979 } 1980 1981 err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE, 1982 (u8)privileged_qkey); 1983 if (err) { 1984 nlmsg_free(msg); 1985 return err; 1986 } 1987 1988 err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, 1); 1989 if (err) { 1990 nlmsg_free(msg); 1991 return err; 1992 } 1993 /* 1994 * Copy-on-fork is supported. 1995 * See commits: 1996 * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes") 1997 * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm") 1998 * for more details. Don't backport this without them. 1999 * 2000 * Return value ignored on purpose, assume copy-on-fork is not 2001 * supported in case of failure. 2002 */ 2003 nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1); 2004 2005 nlmsg_end(msg, nlh); 2006 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2007 } 2008 2009 static int nldev_set_sys_set_netns_doit(struct nlattr *tb[]) 2010 { 2011 u8 enable; 2012 int err; 2013 2014 enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]); 2015 /* Only 0 and 1 are supported */ 2016 if (enable > 1) 2017 return -EINVAL; 2018 2019 err = rdma_compatdev_set(enable); 2020 return err; 2021 } 2022 2023 static int nldev_set_sys_set_pqkey_doit(struct nlattr *tb[]) 2024 { 2025 u8 enable; 2026 2027 enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]); 2028 /* Only 0 and 1 are supported */ 2029 if (enable > 1) 2030 return -EINVAL; 2031 2032 privileged_qkey = enable; 2033 return 0; 2034 } 2035 2036 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 2037 struct netlink_ext_ack *extack) 2038 { 2039 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2040 int err; 2041 2042 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2043 nldev_policy, extack); 2044 if (err) 2045 return -EINVAL; 2046 2047 if (tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]) 2048 return nldev_set_sys_set_netns_doit(tb); 2049 2050 if (tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]) 2051 return nldev_set_sys_set_pqkey_doit(tb); 2052 2053 return -EINVAL; 2054 } 2055 2056 2057 static int nldev_stat_set_mode_doit(struct sk_buff *msg, 2058 struct netlink_ext_ack *extack, 2059 struct nlattr *tb[], 2060 struct ib_device *device, u32 port) 2061 { 2062 u32 mode, mask = 0, qpn, cntn = 0; 2063 bool opcnt = false; 2064 int ret; 2065 2066 /* Currently only counter for QP is supported */ 2067 if (!tb[RDMA_NLDEV_ATTR_STAT_RES] || 2068 nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) 2069 return -EINVAL; 2070 2071 if (tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]) 2072 opcnt = !!nla_get_u8( 2073 tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]); 2074 2075 mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]); 2076 if (mode == RDMA_COUNTER_MODE_AUTO) { 2077 if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) 2078 mask = nla_get_u32( 2079 tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); 2080 return rdma_counter_set_auto_mode(device, port, mask, opcnt, 2081 extack); 2082 } 2083 2084 if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) 2085 return -EINVAL; 2086 2087 qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); 2088 if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { 2089 cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); 2090 ret = rdma_counter_bind_qpn(device, port, qpn, cntn); 2091 if (ret) 2092 return ret; 2093 } else { 2094 ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn); 2095 if (ret) 2096 return ret; 2097 } 2098 2099 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || 2100 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { 2101 ret = -EMSGSIZE; 2102 goto err_fill; 2103 } 2104 2105 return 0; 2106 2107 err_fill: 2108 rdma_counter_unbind_qpn(device, port, qpn, cntn); 2109 return ret; 2110 } 2111 2112 static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[], 2113 struct ib_device *device, 2114 u32 port) 2115 { 2116 struct rdma_hw_stats *stats; 2117 struct nlattr *entry_attr; 2118 unsigned long *target; 2119 int rem, i, ret = 0; 2120 u32 index; 2121 2122 stats = ib_get_hw_stats_port(device, port); 2123 if (!stats) 2124 return -EINVAL; 2125 2126 target = kcalloc(BITS_TO_LONGS(stats->num_counters), 2127 sizeof(*stats->is_disabled), GFP_KERNEL); 2128 if (!target) 2129 return -ENOMEM; 2130 2131 nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS], 2132 rem) { 2133 index = nla_get_u32(entry_attr); 2134 if ((index >= stats->num_counters) || 2135 !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) { 2136 ret = -EINVAL; 2137 goto out; 2138 } 2139 2140 set_bit(index, target); 2141 } 2142 2143 for (i = 0; i < stats->num_counters; i++) { 2144 if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL)) 2145 continue; 2146 2147 ret = rdma_counter_modify(device, port, i, test_bit(i, target)); 2148 if (ret) 2149 goto out; 2150 } 2151 2152 out: 2153 kfree(target); 2154 return ret; 2155 } 2156 2157 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 2158 struct netlink_ext_ack *extack) 2159 { 2160 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2161 struct ib_device *device; 2162 struct sk_buff *msg; 2163 u32 index, port; 2164 int ret; 2165 2166 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, 2167 extack); 2168 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || 2169 !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 2170 return -EINVAL; 2171 2172 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2173 device = ib_device_get_by_index(sock_net(skb->sk), index); 2174 if (!device) 2175 return -EINVAL; 2176 2177 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2178 if (!rdma_is_port_valid(device, port)) { 2179 ret = -EINVAL; 2180 goto err_put_device; 2181 } 2182 2183 if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] && 2184 !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { 2185 ret = -EINVAL; 2186 goto err_put_device; 2187 } 2188 2189 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2190 if (!msg) { 2191 ret = -ENOMEM; 2192 goto err_put_device; 2193 } 2194 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2195 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 2196 RDMA_NLDEV_CMD_STAT_SET), 2197 0, 0); 2198 if (!nlh || fill_nldev_handle(msg, device) || 2199 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { 2200 ret = -EMSGSIZE; 2201 goto err_free_msg; 2202 } 2203 2204 if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) { 2205 ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port); 2206 if (ret) 2207 goto err_free_msg; 2208 } 2209 2210 if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { 2211 ret = nldev_stat_set_counter_dynamic_doit(tb, device, port); 2212 if (ret) 2213 goto err_free_msg; 2214 } 2215 2216 nlmsg_end(msg, nlh); 2217 ib_device_put(device); 2218 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2219 2220 err_free_msg: 2221 nlmsg_free(msg); 2222 err_put_device: 2223 ib_device_put(device); 2224 return ret; 2225 } 2226 2227 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 2228 struct netlink_ext_ack *extack) 2229 { 2230 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2231 struct ib_device *device; 2232 struct sk_buff *msg; 2233 u32 index, port, qpn, cntn; 2234 int ret; 2235 2236 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2237 nldev_policy, extack); 2238 if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] || 2239 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || 2240 !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] || 2241 !tb[RDMA_NLDEV_ATTR_RES_LQPN]) 2242 return -EINVAL; 2243 2244 if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) 2245 return -EINVAL; 2246 2247 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2248 device = ib_device_get_by_index(sock_net(skb->sk), index); 2249 if (!device) 2250 return -EINVAL; 2251 2252 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2253 if (!rdma_is_port_valid(device, port)) { 2254 ret = -EINVAL; 2255 goto err; 2256 } 2257 2258 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2259 if (!msg) { 2260 ret = -ENOMEM; 2261 goto err; 2262 } 2263 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2264 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 2265 RDMA_NLDEV_CMD_STAT_SET), 2266 0, 0); 2267 if (!nlh) { 2268 ret = -EMSGSIZE; 2269 goto err_fill; 2270 } 2271 2272 cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); 2273 qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); 2274 if (fill_nldev_handle(msg, device) || 2275 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || 2276 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || 2277 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { 2278 ret = -EMSGSIZE; 2279 goto err_fill; 2280 } 2281 2282 ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); 2283 if (ret) 2284 goto err_fill; 2285 2286 nlmsg_end(msg, nlh); 2287 ib_device_put(device); 2288 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2289 2290 err_fill: 2291 nlmsg_free(msg); 2292 err: 2293 ib_device_put(device); 2294 return ret; 2295 } 2296 2297 static noinline_for_stack int 2298 stat_get_doit_default_counter(struct sk_buff *skb, struct nlmsghdr *nlh, 2299 struct netlink_ext_ack *extack, 2300 struct nlattr *tb[]) 2301 { 2302 struct rdma_hw_stats *stats; 2303 struct nlattr *table_attr; 2304 struct ib_device *device; 2305 int ret, num_cnts, i; 2306 struct sk_buff *msg; 2307 u32 index, port; 2308 u64 v; 2309 2310 if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 2311 return -EINVAL; 2312 2313 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2314 device = ib_device_get_by_index(sock_net(skb->sk), index); 2315 if (!device) 2316 return -EINVAL; 2317 2318 if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) { 2319 ret = -EINVAL; 2320 goto err; 2321 } 2322 2323 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2324 stats = ib_get_hw_stats_port(device, port); 2325 if (!stats) { 2326 ret = -EINVAL; 2327 goto err; 2328 } 2329 2330 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2331 if (!msg) { 2332 ret = -ENOMEM; 2333 goto err; 2334 } 2335 2336 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2337 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 2338 RDMA_NLDEV_CMD_STAT_GET), 2339 0, 0); 2340 2341 if (!nlh || fill_nldev_handle(msg, device) || 2342 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { 2343 ret = -EMSGSIZE; 2344 goto err_msg; 2345 } 2346 2347 mutex_lock(&stats->lock); 2348 2349 num_cnts = device->ops.get_hw_stats(device, stats, port, 0); 2350 if (num_cnts < 0) { 2351 ret = -EINVAL; 2352 goto err_stats; 2353 } 2354 2355 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); 2356 if (!table_attr) { 2357 ret = -EMSGSIZE; 2358 goto err_stats; 2359 } 2360 for (i = 0; i < num_cnts; i++) { 2361 if (test_bit(i, stats->is_disabled)) 2362 continue; 2363 2364 v = stats->value[i] + 2365 rdma_counter_get_hwstat_value(device, port, i); 2366 if (rdma_nl_stat_hwcounter_entry(msg, 2367 stats->descs[i].name, v)) { 2368 ret = -EMSGSIZE; 2369 goto err_table; 2370 } 2371 } 2372 nla_nest_end(msg, table_attr); 2373 2374 mutex_unlock(&stats->lock); 2375 nlmsg_end(msg, nlh); 2376 ib_device_put(device); 2377 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2378 2379 err_table: 2380 nla_nest_cancel(msg, table_attr); 2381 err_stats: 2382 mutex_unlock(&stats->lock); 2383 err_msg: 2384 nlmsg_free(msg); 2385 err: 2386 ib_device_put(device); 2387 return ret; 2388 } 2389 2390 static noinline_for_stack int 2391 stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh, 2392 struct netlink_ext_ack *extack, struct nlattr *tb[]) 2393 2394 { 2395 static enum rdma_nl_counter_mode mode; 2396 static enum rdma_nl_counter_mask mask; 2397 struct ib_device *device; 2398 struct sk_buff *msg; 2399 u32 index, port; 2400 bool opcnt; 2401 int ret; 2402 2403 if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) 2404 return nldev_res_get_counter_doit(skb, nlh, extack); 2405 2406 if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] || 2407 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 2408 return -EINVAL; 2409 2410 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2411 device = ib_device_get_by_index(sock_net(skb->sk), index); 2412 if (!device) 2413 return -EINVAL; 2414 2415 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2416 if (!rdma_is_port_valid(device, port)) { 2417 ret = -EINVAL; 2418 goto err; 2419 } 2420 2421 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2422 if (!msg) { 2423 ret = -ENOMEM; 2424 goto err; 2425 } 2426 2427 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2428 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 2429 RDMA_NLDEV_CMD_STAT_GET), 2430 0, 0); 2431 if (!nlh) { 2432 ret = -EMSGSIZE; 2433 goto err_msg; 2434 } 2435 2436 ret = rdma_counter_get_mode(device, port, &mode, &mask, &opcnt); 2437 if (ret) 2438 goto err_msg; 2439 2440 if (fill_nldev_handle(msg, device) || 2441 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || 2442 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) { 2443 ret = -EMSGSIZE; 2444 goto err_msg; 2445 } 2446 2447 if ((mode == RDMA_COUNTER_MODE_AUTO) && 2448 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) { 2449 ret = -EMSGSIZE; 2450 goto err_msg; 2451 } 2452 2453 if ((mode == RDMA_COUNTER_MODE_AUTO) && 2454 nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, opcnt)) { 2455 ret = -EMSGSIZE; 2456 goto err_msg; 2457 } 2458 2459 nlmsg_end(msg, nlh); 2460 ib_device_put(device); 2461 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2462 2463 err_msg: 2464 nlmsg_free(msg); 2465 err: 2466 ib_device_put(device); 2467 return ret; 2468 } 2469 2470 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 2471 struct netlink_ext_ack *extack) 2472 { 2473 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2474 int ret; 2475 2476 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2477 nldev_policy, NL_VALIDATE_LIBERAL, extack); 2478 if (ret) 2479 return -EINVAL; 2480 2481 if (!tb[RDMA_NLDEV_ATTR_STAT_RES]) 2482 return stat_get_doit_default_counter(skb, nlh, extack, tb); 2483 2484 switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) { 2485 case RDMA_NLDEV_ATTR_RES_QP: 2486 ret = stat_get_doit_qp(skb, nlh, extack, tb); 2487 break; 2488 case RDMA_NLDEV_ATTR_RES_MR: 2489 ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR, 2490 fill_stat_mr_entry); 2491 break; 2492 default: 2493 ret = -EINVAL; 2494 break; 2495 } 2496 2497 return ret; 2498 } 2499 2500 static int nldev_stat_get_dumpit(struct sk_buff *skb, 2501 struct netlink_callback *cb) 2502 { 2503 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2504 int ret; 2505 2506 ret = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2507 nldev_policy, NL_VALIDATE_LIBERAL, NULL); 2508 if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES]) 2509 return -EINVAL; 2510 2511 switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) { 2512 case RDMA_NLDEV_ATTR_RES_QP: 2513 ret = nldev_res_get_counter_dumpit(skb, cb); 2514 break; 2515 case RDMA_NLDEV_ATTR_RES_MR: 2516 ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR, 2517 fill_stat_mr_entry); 2518 break; 2519 default: 2520 ret = -EINVAL; 2521 break; 2522 } 2523 2524 return ret; 2525 } 2526 2527 static int nldev_stat_get_counter_status_doit(struct sk_buff *skb, 2528 struct nlmsghdr *nlh, 2529 struct netlink_ext_ack *extack) 2530 { 2531 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry; 2532 struct rdma_hw_stats *stats; 2533 struct ib_device *device; 2534 struct sk_buff *msg; 2535 u32 devid, port; 2536 int ret, i; 2537 2538 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2539 nldev_policy, NL_VALIDATE_LIBERAL, extack); 2540 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || 2541 !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 2542 return -EINVAL; 2543 2544 devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2545 device = ib_device_get_by_index(sock_net(skb->sk), devid); 2546 if (!device) 2547 return -EINVAL; 2548 2549 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2550 if (!rdma_is_port_valid(device, port)) { 2551 ret = -EINVAL; 2552 goto err; 2553 } 2554 2555 stats = ib_get_hw_stats_port(device, port); 2556 if (!stats) { 2557 ret = -EINVAL; 2558 goto err; 2559 } 2560 2561 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2562 if (!msg) { 2563 ret = -ENOMEM; 2564 goto err; 2565 } 2566 2567 nlh = nlmsg_put( 2568 msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2569 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS), 2570 0, 0); 2571 2572 ret = -EMSGSIZE; 2573 if (!nlh || fill_nldev_handle(msg, device) || 2574 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) 2575 goto err_msg; 2576 2577 table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); 2578 if (!table) 2579 goto err_msg; 2580 2581 mutex_lock(&stats->lock); 2582 for (i = 0; i < stats->num_counters; i++) { 2583 entry = nla_nest_start(msg, 2584 RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY); 2585 if (!entry) 2586 goto err_msg_table; 2587 2588 if (nla_put_string(msg, 2589 RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, 2590 stats->descs[i].name) || 2591 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i)) 2592 goto err_msg_entry; 2593 2594 if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) && 2595 (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC, 2596 !test_bit(i, stats->is_disabled)))) 2597 goto err_msg_entry; 2598 2599 nla_nest_end(msg, entry); 2600 } 2601 mutex_unlock(&stats->lock); 2602 2603 nla_nest_end(msg, table); 2604 nlmsg_end(msg, nlh); 2605 ib_device_put(device); 2606 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2607 2608 err_msg_entry: 2609 nla_nest_cancel(msg, entry); 2610 err_msg_table: 2611 mutex_unlock(&stats->lock); 2612 nla_nest_cancel(msg, table); 2613 err_msg: 2614 nlmsg_free(msg); 2615 err: 2616 ib_device_put(device); 2617 return ret; 2618 } 2619 2620 static int nldev_newdev(struct sk_buff *skb, struct nlmsghdr *nlh, 2621 struct netlink_ext_ack *extack) 2622 { 2623 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2624 enum rdma_nl_dev_type type; 2625 struct ib_device *parent; 2626 char name[IFNAMSIZ] = {}; 2627 u32 parentid; 2628 int ret; 2629 2630 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2631 nldev_policy, extack); 2632 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || 2633 !tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_TYPE]) 2634 return -EINVAL; 2635 2636 nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(name)); 2637 type = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_TYPE]); 2638 parentid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2639 parent = ib_device_get_by_index(sock_net(skb->sk), parentid); 2640 if (!parent) 2641 return -EINVAL; 2642 2643 ret = ib_add_sub_device(parent, type, name); 2644 ib_device_put(parent); 2645 2646 return ret; 2647 } 2648 2649 static int nldev_deldev(struct sk_buff *skb, struct nlmsghdr *nlh, 2650 struct netlink_ext_ack *extack) 2651 { 2652 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2653 struct ib_device *device; 2654 u32 devid; 2655 int ret; 2656 2657 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2658 nldev_policy, extack); 2659 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 2660 return -EINVAL; 2661 2662 devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2663 device = ib_device_get_by_index(sock_net(skb->sk), devid); 2664 if (!device) 2665 return -EINVAL; 2666 2667 return ib_del_sub_device_and_put(device); 2668 } 2669 2670 static int fill_frmr_pool_key(struct sk_buff *msg, struct ib_frmr_key *key) 2671 { 2672 struct nlattr *key_attr; 2673 2674 key_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY); 2675 if (!key_attr) 2676 return -EMSGSIZE; 2677 2678 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS, key->ats)) 2679 goto err; 2680 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS, 2681 key->access_flags)) 2682 goto err; 2683 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY, 2684 key->vendor_key, RDMA_NLDEV_ATTR_PAD)) 2685 goto err; 2686 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS, 2687 key->num_dma_blocks, RDMA_NLDEV_ATTR_PAD)) 2688 goto err; 2689 2690 if (key->kernel_vendor_key && 2691 nla_put_u64_64bit(msg, 2692 RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY, 2693 key->kernel_vendor_key, RDMA_NLDEV_ATTR_PAD)) 2694 goto err; 2695 2696 nla_nest_end(msg, key_attr); 2697 return 0; 2698 2699 err: 2700 return -EMSGSIZE; 2701 } 2702 2703 static int fill_frmr_pool_entry(struct sk_buff *msg, struct ib_frmr_pool *pool) 2704 { 2705 if (fill_frmr_pool_key(msg, &pool->key)) 2706 return -EMSGSIZE; 2707 2708 spin_lock(&pool->lock); 2709 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_QUEUE_HANDLES, 2710 pool->queue.ci + pool->inactive_queue.ci)) 2711 goto err_unlock; 2712 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_MAX_IN_USE, 2713 pool->max_in_use, RDMA_NLDEV_ATTR_PAD)) 2714 goto err_unlock; 2715 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_IN_USE, 2716 pool->in_use, RDMA_NLDEV_ATTR_PAD)) 2717 goto err_unlock; 2718 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES, 2719 pool->pinned_handles)) 2720 goto err_unlock; 2721 spin_unlock(&pool->lock); 2722 2723 return 0; 2724 2725 err_unlock: 2726 spin_unlock(&pool->lock); 2727 return -EMSGSIZE; 2728 } 2729 2730 static int nldev_frmr_pools_parse_key(struct nlattr *tb[], 2731 struct ib_frmr_key *key, 2732 struct netlink_ext_ack *extack) 2733 { 2734 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS]) 2735 key->ats = nla_get_u8(tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS]); 2736 2737 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS]) 2738 key->access_flags = nla_get_u32( 2739 tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS]); 2740 2741 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY]) 2742 key->vendor_key = nla_get_u64( 2743 tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY]); 2744 2745 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS]) 2746 key->num_dma_blocks = nla_get_u64( 2747 tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS]); 2748 2749 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY]) 2750 return -EINVAL; 2751 2752 return 0; 2753 } 2754 2755 static int nldev_frmr_pools_set_pinned(struct ib_device *device, 2756 struct nlattr *tb[], 2757 struct netlink_ext_ack *extack) 2758 { 2759 struct nlattr *key_tb[RDMA_NLDEV_ATTR_MAX]; 2760 struct ib_frmr_key key = { 0 }; 2761 u32 pinned_handles = 0; 2762 int err = 0; 2763 2764 pinned_handles = 2765 nla_get_u32(tb[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES]); 2766 2767 if (!tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY]) 2768 return -EINVAL; 2769 2770 err = nla_parse_nested(key_tb, RDMA_NLDEV_ATTR_MAX - 1, 2771 tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY], nldev_policy, 2772 extack); 2773 if (err) 2774 return err; 2775 2776 err = nldev_frmr_pools_parse_key(key_tb, &key, extack); 2777 if (err) 2778 return err; 2779 2780 err = ib_frmr_pools_set_pinned(device, &key, pinned_handles); 2781 2782 return err; 2783 } 2784 2785 static int nldev_frmr_pools_get_dumpit(struct sk_buff *skb, 2786 struct netlink_callback *cb) 2787 { 2788 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2789 struct ib_frmr_pools *pools; 2790 int err, ret = 0, idx = 0; 2791 struct ib_frmr_pool *pool; 2792 struct nlattr *table_attr; 2793 struct nlattr *entry_attr; 2794 bool show_details = false; 2795 struct ib_device *device; 2796 int start = cb->args[0]; 2797 struct rb_node *node; 2798 struct nlmsghdr *nlh; 2799 bool filled = false; 2800 2801 err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2802 nldev_policy, NL_VALIDATE_LIBERAL, NULL); 2803 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 2804 return -EINVAL; 2805 2806 device = ib_device_get_by_index( 2807 sock_net(skb->sk), nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX])); 2808 if (!device) 2809 return -EINVAL; 2810 2811 if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]) 2812 show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]); 2813 2814 pools = device->frmr_pools; 2815 if (!pools) { 2816 ib_device_put(device); 2817 return 0; 2818 } 2819 2820 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 2821 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 2822 RDMA_NLDEV_CMD_FRMR_POOLS_GET), 2823 0, NLM_F_MULTI); 2824 2825 if (!nlh || fill_nldev_handle(skb, device)) { 2826 ret = -EMSGSIZE; 2827 goto err; 2828 } 2829 2830 table_attr = nla_nest_start_noflag(skb, RDMA_NLDEV_ATTR_FRMR_POOLS); 2831 if (!table_attr) { 2832 ret = -EMSGSIZE; 2833 goto err; 2834 } 2835 2836 read_lock(&pools->rb_lock); 2837 for (node = rb_first(&pools->rb_root); node; node = rb_next(node)) { 2838 pool = rb_entry(node, struct ib_frmr_pool, node); 2839 if (pool->key.kernel_vendor_key && !show_details) 2840 continue; 2841 2842 if (idx < start) { 2843 idx++; 2844 continue; 2845 } 2846 2847 filled = true; 2848 2849 entry_attr = nla_nest_start_noflag( 2850 skb, RDMA_NLDEV_ATTR_FRMR_POOL_ENTRY); 2851 if (!entry_attr) { 2852 ret = -EMSGSIZE; 2853 goto end_msg; 2854 } 2855 2856 if (fill_frmr_pool_entry(skb, pool)) { 2857 nla_nest_cancel(skb, entry_attr); 2858 ret = -EMSGSIZE; 2859 goto end_msg; 2860 } 2861 2862 nla_nest_end(skb, entry_attr); 2863 idx++; 2864 } 2865 end_msg: 2866 read_unlock(&pools->rb_lock); 2867 2868 nla_nest_end(skb, table_attr); 2869 nlmsg_end(skb, nlh); 2870 cb->args[0] = idx; 2871 2872 /* 2873 * No more entries to fill, cancel the message and 2874 * return 0 to mark end of dumpit. 2875 */ 2876 if (!filled) 2877 goto err; 2878 2879 ib_device_put(device); 2880 return skb->len; 2881 2882 err: 2883 nlmsg_cancel(skb, nlh); 2884 ib_device_put(device); 2885 return ret; 2886 } 2887 2888 static int nldev_frmr_pools_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 2889 struct netlink_ext_ack *extack) 2890 { 2891 struct ib_device *device; 2892 struct nlattr **tb; 2893 u32 aging_period; 2894 int err; 2895 2896 tb = kzalloc_objs(*tb, RDMA_NLDEV_ATTR_MAX, GFP_KERNEL); 2897 if (!tb) 2898 return -ENOMEM; 2899 2900 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, 2901 extack); 2902 if (err) 2903 goto free_tb; 2904 2905 if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX]) { 2906 err = -EINVAL; 2907 goto free_tb; 2908 } 2909 2910 device = ib_device_get_by_index( 2911 sock_net(skb->sk), nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX])); 2912 if (!device) { 2913 err = -EINVAL; 2914 goto free_tb; 2915 } 2916 2917 if (tb[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD]) { 2918 aging_period = nla_get_u32( 2919 tb[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD]); 2920 err = ib_frmr_pools_set_aging_period(device, aging_period); 2921 goto done; 2922 } 2923 2924 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES]) 2925 err = nldev_frmr_pools_set_pinned(device, tb, extack); 2926 2927 done: 2928 ib_device_put(device); 2929 free_tb: 2930 kfree(tb); 2931 return err; 2932 } 2933 2934 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { 2935 [RDMA_NLDEV_CMD_GET] = { 2936 .doit = nldev_get_doit, 2937 .dump = nldev_get_dumpit, 2938 }, 2939 [RDMA_NLDEV_CMD_GET_CHARDEV] = { 2940 .doit = nldev_get_chardev, 2941 }, 2942 [RDMA_NLDEV_CMD_SET] = { 2943 .doit = nldev_set_doit, 2944 .flags = RDMA_NL_ADMIN_PERM, 2945 }, 2946 [RDMA_NLDEV_CMD_NEWLINK] = { 2947 .doit = nldev_newlink, 2948 .flags = RDMA_NL_ADMIN_PERM, 2949 }, 2950 [RDMA_NLDEV_CMD_DELLINK] = { 2951 .doit = nldev_dellink, 2952 .flags = RDMA_NL_ADMIN_PERM, 2953 }, 2954 [RDMA_NLDEV_CMD_PORT_GET] = { 2955 .doit = nldev_port_get_doit, 2956 .dump = nldev_port_get_dumpit, 2957 }, 2958 [RDMA_NLDEV_CMD_RES_GET] = { 2959 .doit = nldev_res_get_doit, 2960 .dump = nldev_res_get_dumpit, 2961 }, 2962 [RDMA_NLDEV_CMD_RES_QP_GET] = { 2963 .doit = nldev_res_get_qp_doit, 2964 .dump = nldev_res_get_qp_dumpit, 2965 }, 2966 [RDMA_NLDEV_CMD_RES_CM_ID_GET] = { 2967 .doit = nldev_res_get_cm_id_doit, 2968 .dump = nldev_res_get_cm_id_dumpit, 2969 }, 2970 [RDMA_NLDEV_CMD_RES_CQ_GET] = { 2971 .doit = nldev_res_get_cq_doit, 2972 .dump = nldev_res_get_cq_dumpit, 2973 }, 2974 [RDMA_NLDEV_CMD_RES_MR_GET] = { 2975 .doit = nldev_res_get_mr_doit, 2976 .dump = nldev_res_get_mr_dumpit, 2977 }, 2978 [RDMA_NLDEV_CMD_RES_PD_GET] = { 2979 .doit = nldev_res_get_pd_doit, 2980 .dump = nldev_res_get_pd_dumpit, 2981 }, 2982 [RDMA_NLDEV_CMD_RES_CTX_GET] = { 2983 .doit = nldev_res_get_ctx_doit, 2984 .dump = nldev_res_get_ctx_dumpit, 2985 }, 2986 [RDMA_NLDEV_CMD_RES_SRQ_GET] = { 2987 .doit = nldev_res_get_srq_doit, 2988 .dump = nldev_res_get_srq_dumpit, 2989 }, 2990 [RDMA_NLDEV_CMD_SYS_GET] = { 2991 .doit = nldev_sys_get_doit, 2992 }, 2993 [RDMA_NLDEV_CMD_SYS_SET] = { 2994 .doit = nldev_set_sys_set_doit, 2995 .flags = RDMA_NL_ADMIN_PERM, 2996 }, 2997 [RDMA_NLDEV_CMD_STAT_SET] = { 2998 .doit = nldev_stat_set_doit, 2999 .flags = RDMA_NL_ADMIN_PERM, 3000 }, 3001 [RDMA_NLDEV_CMD_STAT_GET] = { 3002 .doit = nldev_stat_get_doit, 3003 .dump = nldev_stat_get_dumpit, 3004 }, 3005 [RDMA_NLDEV_CMD_STAT_DEL] = { 3006 .doit = nldev_stat_del_doit, 3007 .flags = RDMA_NL_ADMIN_PERM, 3008 }, 3009 [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = { 3010 .doit = nldev_res_get_qp_raw_doit, 3011 .dump = nldev_res_get_qp_raw_dumpit, 3012 .flags = RDMA_NL_ADMIN_PERM, 3013 }, 3014 [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = { 3015 .doit = nldev_res_get_cq_raw_doit, 3016 .dump = nldev_res_get_cq_raw_dumpit, 3017 .flags = RDMA_NL_ADMIN_PERM, 3018 }, 3019 [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = { 3020 .doit = nldev_res_get_mr_raw_doit, 3021 .dump = nldev_res_get_mr_raw_dumpit, 3022 .flags = RDMA_NL_ADMIN_PERM, 3023 }, 3024 [RDMA_NLDEV_CMD_RES_SRQ_GET_RAW] = { 3025 .doit = nldev_res_get_srq_raw_doit, 3026 .dump = nldev_res_get_srq_raw_dumpit, 3027 .flags = RDMA_NL_ADMIN_PERM, 3028 }, 3029 [RDMA_NLDEV_CMD_STAT_GET_STATUS] = { 3030 .doit = nldev_stat_get_counter_status_doit, 3031 }, 3032 [RDMA_NLDEV_CMD_NEWDEV] = { 3033 .doit = nldev_newdev, 3034 .flags = RDMA_NL_ADMIN_PERM, 3035 }, 3036 [RDMA_NLDEV_CMD_DELDEV] = { 3037 .doit = nldev_deldev, 3038 .flags = RDMA_NL_ADMIN_PERM, 3039 }, 3040 [RDMA_NLDEV_CMD_FRMR_POOLS_GET] = { 3041 .dump = nldev_frmr_pools_get_dumpit, 3042 }, 3043 [RDMA_NLDEV_CMD_FRMR_POOLS_SET] = { 3044 .doit = nldev_frmr_pools_set_doit, 3045 .flags = RDMA_NL_ADMIN_PERM, 3046 }, 3047 }; 3048 3049 static int fill_mon_netdev_rename(struct sk_buff *msg, 3050 struct ib_device *device, u32 port, 3051 const struct net *net) 3052 { 3053 struct net_device *netdev = ib_device_get_netdev(device, port); 3054 int ret = 0; 3055 3056 if (!netdev || !net_eq(dev_net(netdev), net)) 3057 goto out; 3058 3059 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); 3060 if (ret) 3061 goto out; 3062 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); 3063 out: 3064 dev_put(netdev); 3065 return ret; 3066 } 3067 3068 static int fill_mon_netdev_association(struct sk_buff *msg, 3069 struct ib_device *device, u32 port, 3070 const struct net *net) 3071 { 3072 struct net_device *netdev = ib_device_get_netdev(device, port); 3073 int ret = 0; 3074 3075 if (netdev && !net_eq(dev_net(netdev), net)) 3076 goto out; 3077 3078 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index); 3079 if (ret) 3080 goto out; 3081 3082 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, 3083 dev_name(&device->dev)); 3084 if (ret) 3085 goto out; 3086 3087 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port); 3088 if (ret) 3089 goto out; 3090 3091 if (netdev) { 3092 ret = nla_put_u32(msg, 3093 RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); 3094 if (ret) 3095 goto out; 3096 3097 ret = nla_put_string(msg, 3098 RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); 3099 } 3100 3101 out: 3102 dev_put(netdev); 3103 return ret; 3104 } 3105 3106 static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num, 3107 enum rdma_nl_notify_event_type type) 3108 { 3109 struct net_device *netdev; 3110 3111 switch (type) { 3112 case RDMA_REGISTER_EVENT: 3113 dev_warn_ratelimited(&device->dev, 3114 "Failed to send RDMA monitor register device event\n"); 3115 break; 3116 case RDMA_UNREGISTER_EVENT: 3117 dev_warn_ratelimited(&device->dev, 3118 "Failed to send RDMA monitor unregister device event\n"); 3119 break; 3120 case RDMA_NETDEV_ATTACH_EVENT: 3121 netdev = ib_device_get_netdev(device, port_num); 3122 dev_warn_ratelimited(&device->dev, 3123 "Failed to send RDMA monitor netdev attach event: port %d netdev %d\n", 3124 port_num, netdev->ifindex); 3125 dev_put(netdev); 3126 break; 3127 case RDMA_NETDEV_DETACH_EVENT: 3128 dev_warn_ratelimited(&device->dev, 3129 "Failed to send RDMA monitor netdev detach event: port %d\n", 3130 port_num); 3131 break; 3132 case RDMA_RENAME_EVENT: 3133 dev_warn_ratelimited(&device->dev, 3134 "Failed to send RDMA monitor rename device event\n"); 3135 break; 3136 3137 case RDMA_NETDEV_RENAME_EVENT: 3138 netdev = ib_device_get_netdev(device, port_num); 3139 dev_warn_ratelimited(&device->dev, 3140 "Failed to send RDMA monitor netdev rename event: port %d netdev %d\n", 3141 port_num, netdev->ifindex); 3142 dev_put(netdev); 3143 break; 3144 default: 3145 break; 3146 } 3147 } 3148 3149 int rdma_nl_notify_event(struct ib_device *device, u32 port_num, 3150 enum rdma_nl_notify_event_type type) 3151 { 3152 struct sk_buff *skb; 3153 int ret = -EMSGSIZE; 3154 struct net *net; 3155 void *nlh; 3156 3157 net = read_pnet(&device->coredev.rdma_net); 3158 if (!net) 3159 return -EINVAL; 3160 3161 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 3162 if (!skb) 3163 return -ENOMEM; 3164 nlh = nlmsg_put(skb, 0, 0, 3165 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR), 3166 0, 0); 3167 if (!nlh) 3168 goto err_free; 3169 3170 switch (type) { 3171 case RDMA_REGISTER_EVENT: 3172 case RDMA_UNREGISTER_EVENT: 3173 case RDMA_RENAME_EVENT: 3174 ret = fill_nldev_handle(skb, device); 3175 if (ret) 3176 goto err_free; 3177 break; 3178 case RDMA_NETDEV_ATTACH_EVENT: 3179 case RDMA_NETDEV_DETACH_EVENT: 3180 ret = fill_mon_netdev_association(skb, device, port_num, net); 3181 if (ret) 3182 goto err_free; 3183 break; 3184 case RDMA_NETDEV_RENAME_EVENT: 3185 ret = fill_mon_netdev_rename(skb, device, port_num, net); 3186 if (ret) 3187 goto err_free; 3188 break; 3189 default: 3190 break; 3191 } 3192 3193 ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type); 3194 if (ret) 3195 goto err_free; 3196 3197 nlmsg_end(skb, nlh); 3198 ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL); 3199 if (ret && ret != -ESRCH) { 3200 skb = NULL; /* skb is freed in the netlink send-op handling */ 3201 goto err_free; 3202 } 3203 return 0; 3204 3205 err_free: 3206 rdma_nl_notify_err_msg(device, port_num, type); 3207 nlmsg_free(skb); 3208 return ret; 3209 } 3210 3211 void __init nldev_init(void) 3212 { 3213 rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table); 3214 } 3215 3216 void nldev_exit(void) 3217 { 3218 rdma_nl_unregister(RDMA_NL_NLDEV); 3219 } 3220 3221 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5); 3222