1 /* 2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Neither the names of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * Alternatively, this software may be distributed under the terms of the 17 * GNU General Public License ("GPL") version 2 as published by the Free 18 * Software Foundation. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <linux/module.h> 34 #include <linux/pid.h> 35 #include <linux/pid_namespace.h> 36 #include <linux/mutex.h> 37 #include <net/netlink.h> 38 #include <rdma/rdma_cm.h> 39 #include <rdma/rdma_netlink.h> 40 #include <rdma/frmr_pools.h> 41 42 #include "core_priv.h" 43 #include "cma_priv.h" 44 #include "restrack.h" 45 #include "uverbs.h" 46 #include "frmr_pools.h" 47 48 /* 49 * This determines whether a non-privileged user is allowed to specify a 50 * controlled QKEY or not, when true non-privileged user is allowed to specify 51 * a controlled QKEY. 52 */ 53 static bool privileged_qkey; 54 static DEFINE_MUTEX(nldev_dellink_mutex); 55 56 typedef int (*res_fill_func_t)(struct sk_buff*, bool, 57 struct rdma_restrack_entry*, uint32_t); 58 59 /* 60 * Sort array elements by the netlink attribute name 61 */ 62 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { 63 [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 }, 64 [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 }, 65 [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING, 66 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 67 [RDMA_NLDEV_ATTR_CHARDEV_TYPE] = { .type = NLA_NUL_STRING, 68 .len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE }, 69 [RDMA_NLDEV_ATTR_DEV_DIM] = { .type = NLA_U8 }, 70 [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 }, 71 [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, 72 .len = IB_DEVICE_NAME_MAX }, 73 [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 }, 74 [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING, 75 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 76 [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED }, 77 [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED }, 78 [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 }, 79 [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING, 80 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 81 [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 }, 82 [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 }, 83 [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 }, 84 [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 }, 85 [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING, 86 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 87 [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 }, 88 [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING, 89 .len = IFNAMSIZ }, 90 [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 }, 91 [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 }, 92 [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING, 93 .len = IFNAMSIZ }, 94 [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 }, 95 [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 }, 96 [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 }, 97 [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 }, 98 [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED }, 99 [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 }, 100 [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED }, 101 [RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED }, 102 [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 }, 103 [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 }, 104 [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED }, 105 [RDMA_NLDEV_ATTR_RES_CTX] = { .type = NLA_NESTED }, 106 [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 }, 107 [RDMA_NLDEV_ATTR_RES_CTX_ENTRY] = { .type = NLA_NESTED }, 108 [RDMA_NLDEV_ATTR_RES_DST_ADDR] = { 109 .len = sizeof(struct __kernel_sockaddr_storage) }, 110 [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 }, 111 [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING, 112 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 113 [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 }, 114 [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 }, 115 [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 }, 116 [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED }, 117 [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 }, 118 [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 }, 119 [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED }, 120 [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 }, 121 [RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED }, 122 [RDMA_NLDEV_ATTR_RES_PDN] = { .type = NLA_U32 }, 123 [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED }, 124 [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 }, 125 [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 }, 126 [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 }, 127 [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED }, 128 [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED }, 129 [RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY }, 130 [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 }, 131 [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 }, 132 [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 }, 133 [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 }, 134 [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = { 135 .len = sizeof(struct __kernel_sockaddr_storage) }, 136 [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 }, 137 [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED }, 138 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED }, 139 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 }, 140 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING, 141 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 142 [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 }, 143 [RDMA_NLDEV_ATTR_RES_SUBTYPE] = { .type = NLA_NUL_STRING, 144 .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, 145 [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 }, 146 [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 }, 147 [RDMA_NLDEV_ATTR_RES_SRQ] = { .type = NLA_NESTED }, 148 [RDMA_NLDEV_ATTR_RES_SRQN] = { .type = NLA_U32 }, 149 [RDMA_NLDEV_ATTR_RES_SRQ_ENTRY] = { .type = NLA_NESTED }, 150 [RDMA_NLDEV_ATTR_MIN_RANGE] = { .type = NLA_U32 }, 151 [RDMA_NLDEV_ATTR_MAX_RANGE] = { .type = NLA_U32 }, 152 [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 }, 153 [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 }, 154 [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = { .type = NLA_U32 }, 155 [RDMA_NLDEV_ATTR_STAT_MODE] = { .type = NLA_U32 }, 156 [RDMA_NLDEV_ATTR_STAT_RES] = { .type = NLA_U32 }, 157 [RDMA_NLDEV_ATTR_STAT_COUNTER] = { .type = NLA_NESTED }, 158 [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY] = { .type = NLA_NESTED }, 159 [RDMA_NLDEV_ATTR_STAT_COUNTER_ID] = { .type = NLA_U32 }, 160 [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS] = { .type = NLA_NESTED }, 161 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY] = { .type = NLA_NESTED }, 162 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING }, 163 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 }, 164 [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 }, 165 [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 }, 166 [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 }, 167 [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, 168 [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 }, 169 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 }, 170 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 }, 171 [RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = { .type = NLA_U8 }, 172 [RDMA_NLDEV_ATTR_DRIVER_DETAILS] = { .type = NLA_U8 }, 173 [RDMA_NLDEV_ATTR_DEV_TYPE] = { .type = NLA_U8 }, 174 [RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING }, 175 [RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 }, 176 [RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 }, 177 [RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED] = { .type = NLA_U8 }, 178 [RDMA_NLDEV_ATTR_FRMR_POOLS] = { .type = NLA_NESTED }, 179 [RDMA_NLDEV_ATTR_FRMR_POOL_ENTRY] = { .type = NLA_NESTED }, 180 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY] = { .type = NLA_NESTED }, 181 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS] = { .type = NLA_U8 }, 182 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS] = { .type = NLA_U32 }, 183 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY] = { .type = NLA_U64 }, 184 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS] = { .type = NLA_U64 }, 185 [RDMA_NLDEV_ATTR_FRMR_POOL_QUEUE_HANDLES] = { .type = NLA_U32 }, 186 [RDMA_NLDEV_ATTR_FRMR_POOL_MAX_IN_USE] = { .type = NLA_U64 }, 187 [RDMA_NLDEV_ATTR_FRMR_POOL_IN_USE] = { .type = NLA_U64 }, 188 [RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD] = { .type = NLA_U32 }, 189 [RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES] = { .type = NLA_U32 }, 190 [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY] = { .type = NLA_U64 }, 191 }; 192 193 static int put_driver_name_print_type(struct sk_buff *msg, const char *name, 194 enum rdma_nldev_print_type print_type) 195 { 196 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name)) 197 return -EMSGSIZE; 198 if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC && 199 nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type)) 200 return -EMSGSIZE; 201 202 return 0; 203 } 204 205 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, 206 enum rdma_nldev_print_type print_type, 207 u32 value) 208 { 209 if (put_driver_name_print_type(msg, name, print_type)) 210 return -EMSGSIZE; 211 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value)) 212 return -EMSGSIZE; 213 214 return 0; 215 } 216 217 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, 218 enum rdma_nldev_print_type print_type, 219 u64 value) 220 { 221 if (put_driver_name_print_type(msg, name, print_type)) 222 return -EMSGSIZE; 223 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value, 224 RDMA_NLDEV_ATTR_PAD)) 225 return -EMSGSIZE; 226 227 return 0; 228 } 229 230 int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name, 231 const char *str) 232 { 233 if (put_driver_name_print_type(msg, name, 234 RDMA_NLDEV_PRINT_TYPE_UNSPEC)) 235 return -EMSGSIZE; 236 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str)) 237 return -EMSGSIZE; 238 239 return 0; 240 } 241 EXPORT_SYMBOL(rdma_nl_put_driver_string); 242 243 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value) 244 { 245 return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC, 246 value); 247 } 248 EXPORT_SYMBOL(rdma_nl_put_driver_u32); 249 250 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name, 251 u32 value) 252 { 253 return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX, 254 value); 255 } 256 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex); 257 258 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value) 259 { 260 return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC, 261 value); 262 } 263 EXPORT_SYMBOL(rdma_nl_put_driver_u64); 264 265 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value) 266 { 267 return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX, 268 value); 269 } 270 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex); 271 272 bool rdma_nl_get_privileged_qkey(void) 273 { 274 return privileged_qkey; 275 } 276 EXPORT_SYMBOL(rdma_nl_get_privileged_qkey); 277 278 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device) 279 { 280 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) 281 return -EMSGSIZE; 282 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, 283 dev_name(&device->dev))) 284 return -EMSGSIZE; 285 286 return 0; 287 } 288 289 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) 290 { 291 char fw[IB_FW_VERSION_NAME_MAX]; 292 int ret = 0; 293 u32 port; 294 295 if (fill_nldev_handle(msg, device)) 296 return -EMSGSIZE; 297 298 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device))) 299 return -EMSGSIZE; 300 301 BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64)); 302 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, 303 device->attrs.device_cap_flags, 304 RDMA_NLDEV_ATTR_PAD)) 305 return -EMSGSIZE; 306 307 ib_get_device_fw_str(device, fw); 308 /* Device without FW has strlen(fw) = 0 */ 309 if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw)) 310 return -EMSGSIZE; 311 312 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID, 313 be64_to_cpu(device->node_guid), 314 RDMA_NLDEV_ATTR_PAD)) 315 return -EMSGSIZE; 316 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, 317 be64_to_cpu(device->attrs.sys_image_guid), 318 RDMA_NLDEV_ATTR_PAD)) 319 return -EMSGSIZE; 320 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type)) 321 return -EMSGSIZE; 322 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim)) 323 return -EMSGSIZE; 324 325 if (device->type && 326 nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_TYPE, device->type)) 327 return -EMSGSIZE; 328 329 if (device->parent && 330 nla_put_string(msg, RDMA_NLDEV_ATTR_PARENT_NAME, 331 dev_name(&device->parent->dev))) 332 return -EMSGSIZE; 333 334 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE, 335 device->name_assign_type)) 336 return -EMSGSIZE; 337 338 /* 339 * Link type is determined on first port and mlx4 device 340 * which can potentially have two different link type for the same 341 * IB device is considered as better to be avoided in the future, 342 */ 343 port = rdma_start_port(device); 344 if (rdma_cap_opa_mad(device, port)) 345 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa"); 346 else if (rdma_protocol_ib(device, port)) 347 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib"); 348 else if (rdma_protocol_iwarp(device, port)) 349 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw"); 350 else if (rdma_protocol_roce(device, port)) 351 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce"); 352 else if (rdma_protocol_usnic(device, port)) 353 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, 354 "usnic"); 355 return ret; 356 } 357 358 static int fill_port_info(struct sk_buff *msg, 359 struct ib_device *device, u32 port, 360 const struct net *net) 361 { 362 struct net_device *netdev = NULL; 363 struct ib_port_attr attr; 364 int ret; 365 u64 cap_flags = 0; 366 367 if (fill_nldev_handle(msg, device)) 368 return -EMSGSIZE; 369 370 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) 371 return -EMSGSIZE; 372 373 ret = ib_query_port(device, port, &attr); 374 if (ret) 375 return ret; 376 377 if (rdma_protocol_ib(device, port)) { 378 BUILD_BUG_ON((sizeof(attr.port_cap_flags) + 379 sizeof(attr.port_cap_flags2)) > sizeof(u64)); 380 cap_flags = attr.port_cap_flags | 381 ((u64)attr.port_cap_flags2 << 32); 382 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, 383 cap_flags, RDMA_NLDEV_ATTR_PAD)) 384 return -EMSGSIZE; 385 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, 386 attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD)) 387 return -EMSGSIZE; 388 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid)) 389 return -EMSGSIZE; 390 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid)) 391 return -EMSGSIZE; 392 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc)) 393 return -EMSGSIZE; 394 } 395 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state)) 396 return -EMSGSIZE; 397 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state)) 398 return -EMSGSIZE; 399 400 netdev = ib_device_get_netdev(device, port); 401 if (netdev && net_eq(dev_net(netdev), net)) { 402 ret = nla_put_u32(msg, 403 RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); 404 if (ret) 405 goto out; 406 ret = nla_put_string(msg, 407 RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); 408 } 409 410 out: 411 dev_put(netdev); 412 return ret; 413 } 414 415 static int fill_res_info_entry(struct sk_buff *msg, 416 const char *name, u64 curr) 417 { 418 struct nlattr *entry_attr; 419 420 entry_attr = nla_nest_start_noflag(msg, 421 RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY); 422 if (!entry_attr) 423 return -EMSGSIZE; 424 425 if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name)) 426 goto err; 427 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 428 RDMA_NLDEV_ATTR_PAD)) 429 goto err; 430 431 nla_nest_end(msg, entry_attr); 432 return 0; 433 434 err: 435 nla_nest_cancel(msg, entry_attr); 436 return -EMSGSIZE; 437 } 438 439 static int fill_res_info(struct sk_buff *msg, struct ib_device *device, 440 bool show_details) 441 { 442 static const char * const names[RDMA_RESTRACK_MAX] = { 443 [RDMA_RESTRACK_PD] = "pd", 444 [RDMA_RESTRACK_CQ] = "cq", 445 [RDMA_RESTRACK_QP] = "qp", 446 [RDMA_RESTRACK_CM_ID] = "cm_id", 447 [RDMA_RESTRACK_MR] = "mr", 448 [RDMA_RESTRACK_CTX] = "ctx", 449 [RDMA_RESTRACK_SRQ] = "srq", 450 }; 451 452 struct nlattr *table_attr; 453 int ret, i, curr; 454 455 if (fill_nldev_handle(msg, device)) 456 return -EMSGSIZE; 457 458 table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY); 459 if (!table_attr) 460 return -EMSGSIZE; 461 462 for (i = 0; i < RDMA_RESTRACK_MAX; i++) { 463 if (!names[i]) 464 continue; 465 curr = rdma_restrack_count(device, i, show_details); 466 ret = fill_res_info_entry(msg, names[i], curr); 467 if (ret) 468 goto err; 469 } 470 471 nla_nest_end(msg, table_attr); 472 return 0; 473 474 err: 475 nla_nest_cancel(msg, table_attr); 476 return ret; 477 } 478 479 static int fill_res_name_pid(struct sk_buff *msg, 480 struct rdma_restrack_entry *res) 481 { 482 int err = 0; 483 484 /* 485 * For user resources, user is should read /proc/PID/comm to get the 486 * name of the task file. 487 */ 488 if (rdma_is_kernel_res(res)) { 489 err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, 490 res->kern_name); 491 } else { 492 pid_t pid; 493 494 pid = task_pid_vnr(res->task); 495 /* 496 * Task is dead and in zombie state. 497 * There is no need to print PID anymore. 498 */ 499 if (pid) 500 /* 501 * This part is racy, task can be killed and PID will 502 * be zero right here but it is ok, next query won't 503 * return PID. We don't promise real-time reflection 504 * of SW objects. 505 */ 506 err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid); 507 } 508 509 return err ? -EMSGSIZE : 0; 510 } 511 512 static int fill_res_qp_entry_query(struct sk_buff *msg, 513 struct rdma_restrack_entry *res, 514 struct ib_device *dev, 515 struct ib_qp *qp) 516 { 517 struct ib_qp_init_attr qp_init_attr; 518 struct ib_qp_attr qp_attr; 519 int ret; 520 521 ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr); 522 if (ret) 523 return ret; 524 525 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) { 526 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN, 527 qp_attr.dest_qp_num)) 528 goto err; 529 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN, 530 qp_attr.rq_psn)) 531 goto err; 532 } 533 534 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn)) 535 goto err; 536 537 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC || 538 qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) { 539 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE, 540 qp_attr.path_mig_state)) 541 goto err; 542 } 543 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type)) 544 goto err; 545 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state)) 546 goto err; 547 548 if (dev->ops.fill_res_qp_entry) 549 return dev->ops.fill_res_qp_entry(msg, qp); 550 return 0; 551 552 err: return -EMSGSIZE; 553 } 554 555 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, 556 struct rdma_restrack_entry *res, uint32_t port) 557 { 558 struct ib_qp *qp = container_of(res, struct ib_qp, res); 559 struct ib_device *dev = qp->device; 560 int ret; 561 562 if (port && port != qp->port) 563 return -EAGAIN; 564 565 /* In create_qp() port is not set yet */ 566 if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port)) 567 return -EMSGSIZE; 568 569 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num); 570 if (ret) 571 return -EMSGSIZE; 572 573 if (!rdma_is_kernel_res(res) && 574 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id)) 575 return -EMSGSIZE; 576 577 ret = fill_res_name_pid(msg, res); 578 if (ret) 579 return -EMSGSIZE; 580 581 return fill_res_qp_entry_query(msg, res, dev, qp); 582 } 583 584 static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, 585 struct rdma_restrack_entry *res, uint32_t port) 586 { 587 struct ib_qp *qp = container_of(res, struct ib_qp, res); 588 struct ib_device *dev = qp->device; 589 590 if (port && port != qp->port) 591 return -EAGAIN; 592 if (!dev->ops.fill_res_qp_entry_raw) 593 return -EINVAL; 594 return dev->ops.fill_res_qp_entry_raw(msg, qp); 595 } 596 597 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, 598 struct rdma_restrack_entry *res, uint32_t port) 599 { 600 struct rdma_id_private *id_priv = 601 container_of(res, struct rdma_id_private, res); 602 struct ib_device *dev = id_priv->id.device; 603 struct rdma_cm_id *cm_id = &id_priv->id; 604 605 if (port && port != cm_id->port_num) 606 return -EAGAIN; 607 608 if (cm_id->port_num && 609 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num)) 610 goto err; 611 612 if (id_priv->qp_num) { 613 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num)) 614 goto err; 615 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type)) 616 goto err; 617 } 618 619 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps)) 620 goto err; 621 622 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state)) 623 goto err; 624 625 if (cm_id->route.addr.src_addr.ss_family && 626 nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR, 627 sizeof(cm_id->route.addr.src_addr), 628 &cm_id->route.addr.src_addr)) 629 goto err; 630 if (cm_id->route.addr.dst_addr.ss_family && 631 nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR, 632 sizeof(cm_id->route.addr.dst_addr), 633 &cm_id->route.addr.dst_addr)) 634 goto err; 635 636 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id)) 637 goto err; 638 639 if (fill_res_name_pid(msg, res)) 640 goto err; 641 642 if (dev->ops.fill_res_cm_id_entry) 643 return dev->ops.fill_res_cm_id_entry(msg, cm_id); 644 return 0; 645 646 err: return -EMSGSIZE; 647 } 648 649 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin, 650 struct rdma_restrack_entry *res, uint32_t port) 651 { 652 struct ib_cq *cq = container_of(res, struct ib_cq, res); 653 struct ib_device *dev = cq->device; 654 655 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe)) 656 return -EMSGSIZE; 657 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, 658 atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD)) 659 return -EMSGSIZE; 660 661 /* Poll context is only valid for kernel CQs */ 662 if (rdma_is_kernel_res(res) && 663 nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx)) 664 return -EMSGSIZE; 665 666 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL))) 667 return -EMSGSIZE; 668 669 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id)) 670 return -EMSGSIZE; 671 if (!rdma_is_kernel_res(res) && 672 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, 673 cq->uobject->uevent.uobject.context->res.id)) 674 return -EMSGSIZE; 675 676 if (fill_res_name_pid(msg, res)) 677 return -EMSGSIZE; 678 679 return (dev->ops.fill_res_cq_entry) ? 680 dev->ops.fill_res_cq_entry(msg, cq) : 0; 681 } 682 683 static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, 684 struct rdma_restrack_entry *res, uint32_t port) 685 { 686 struct ib_cq *cq = container_of(res, struct ib_cq, res); 687 struct ib_device *dev = cq->device; 688 689 if (!dev->ops.fill_res_cq_entry_raw) 690 return -EINVAL; 691 return dev->ops.fill_res_cq_entry_raw(msg, cq); 692 } 693 694 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, 695 struct rdma_restrack_entry *res, uint32_t port) 696 { 697 struct ib_mr *mr = container_of(res, struct ib_mr, res); 698 struct ib_device *dev = mr->device; 699 700 if (has_cap_net_admin) { 701 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey)) 702 return -EMSGSIZE; 703 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey)) 704 return -EMSGSIZE; 705 } 706 707 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 708 RDMA_NLDEV_ATTR_PAD)) 709 return -EMSGSIZE; 710 711 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) 712 return -EMSGSIZE; 713 714 if (!rdma_is_kernel_res(res)) { 715 struct ib_pd *pd = READ_ONCE(mr->pd); 716 717 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, pd->res.id)) 718 return -EMSGSIZE; 719 } 720 721 if (fill_res_name_pid(msg, res)) 722 return -EMSGSIZE; 723 724 return (dev->ops.fill_res_mr_entry) ? 725 dev->ops.fill_res_mr_entry(msg, mr) : 726 0; 727 } 728 729 static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, 730 struct rdma_restrack_entry *res, uint32_t port) 731 { 732 struct ib_mr *mr = container_of(res, struct ib_mr, res); 733 struct ib_device *dev = mr->device; 734 735 if (!dev->ops.fill_res_mr_entry_raw) 736 return -EINVAL; 737 return dev->ops.fill_res_mr_entry_raw(msg, mr); 738 } 739 740 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, 741 struct rdma_restrack_entry *res, uint32_t port) 742 { 743 struct ib_pd *pd = container_of(res, struct ib_pd, res); 744 745 if (has_cap_net_admin) { 746 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, 747 pd->local_dma_lkey)) 748 goto err; 749 if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) && 750 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, 751 pd->unsafe_global_rkey)) 752 goto err; 753 } 754 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, 755 atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD)) 756 goto err; 757 758 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id)) 759 goto err; 760 761 if (!rdma_is_kernel_res(res) && 762 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, 763 pd->uobject->context->res.id)) 764 goto err; 765 766 return fill_res_name_pid(msg, res); 767 768 err: return -EMSGSIZE; 769 } 770 771 static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin, 772 struct rdma_restrack_entry *res, uint32_t port) 773 { 774 struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res); 775 776 if (rdma_is_kernel_res(res)) 777 return 0; 778 779 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id)) 780 return -EMSGSIZE; 781 782 return fill_res_name_pid(msg, res); 783 } 784 785 static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range, 786 uint32_t max_range) 787 { 788 struct nlattr *entry_attr; 789 790 if (!min_range) 791 return 0; 792 793 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY); 794 if (!entry_attr) 795 return -EMSGSIZE; 796 797 if (min_range == max_range) { 798 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range)) 799 goto err; 800 } else { 801 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range)) 802 goto err; 803 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range)) 804 goto err; 805 } 806 nla_nest_end(msg, entry_attr); 807 return 0; 808 809 err: 810 nla_nest_cancel(msg, entry_attr); 811 return -EMSGSIZE; 812 } 813 814 static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq) 815 { 816 uint32_t min_range = 0, prev = 0; 817 struct rdma_restrack_entry *res; 818 struct rdma_restrack_root *rt; 819 struct nlattr *table_attr; 820 struct ib_qp *qp = NULL; 821 unsigned long id = 0; 822 823 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP); 824 if (!table_attr) 825 return -EMSGSIZE; 826 827 rt = &srq->device->res[RDMA_RESTRACK_QP]; 828 xa_lock(&rt->xa); 829 xa_for_each(&rt->xa, id, res) { 830 if (!rdma_restrack_get(res)) 831 continue; 832 833 qp = container_of(res, struct ib_qp, res); 834 if (!qp->srq || (qp->srq->res.id != srq->res.id)) { 835 rdma_restrack_put(res); 836 continue; 837 } 838 839 if (qp->qp_num < prev) 840 /* qp_num should be ascending */ 841 goto err_loop; 842 843 if (min_range == 0) { 844 min_range = qp->qp_num; 845 } else if (qp->qp_num > (prev + 1)) { 846 if (fill_res_range_qp_entry(msg, min_range, prev)) 847 goto err_loop; 848 849 min_range = qp->qp_num; 850 } 851 prev = qp->qp_num; 852 rdma_restrack_put(res); 853 } 854 855 xa_unlock(&rt->xa); 856 857 if (fill_res_range_qp_entry(msg, min_range, prev)) 858 goto err; 859 860 nla_nest_end(msg, table_attr); 861 return 0; 862 863 err_loop: 864 rdma_restrack_put(res); 865 xa_unlock(&rt->xa); 866 err: 867 nla_nest_cancel(msg, table_attr); 868 return -EMSGSIZE; 869 } 870 871 static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin, 872 struct rdma_restrack_entry *res, uint32_t port) 873 { 874 struct ib_srq *srq = container_of(res, struct ib_srq, res); 875 struct ib_device *dev = srq->device; 876 877 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id)) 878 goto err; 879 880 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type)) 881 goto err; 882 883 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id)) 884 goto err; 885 886 if (ib_srq_has_cq(srq->srq_type)) { 887 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, 888 srq->ext.cq->res.id)) 889 goto err; 890 } 891 892 if (fill_res_srq_qps(msg, srq)) 893 goto err; 894 895 if (fill_res_name_pid(msg, res)) 896 goto err; 897 898 if (dev->ops.fill_res_srq_entry) 899 return dev->ops.fill_res_srq_entry(msg, srq); 900 901 return 0; 902 903 err: 904 return -EMSGSIZE; 905 } 906 907 static int fill_res_srq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, 908 struct rdma_restrack_entry *res, uint32_t port) 909 { 910 struct ib_srq *srq = container_of(res, struct ib_srq, res); 911 struct ib_device *dev = srq->device; 912 913 if (!dev->ops.fill_res_srq_entry_raw) 914 return -EINVAL; 915 return dev->ops.fill_res_srq_entry_raw(msg, srq); 916 } 917 918 static int fill_stat_counter_mode(struct sk_buff *msg, 919 struct rdma_counter *counter) 920 { 921 struct rdma_counter_mode *m = &counter->mode; 922 923 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode)) 924 return -EMSGSIZE; 925 926 if (m->mode == RDMA_COUNTER_MODE_AUTO) { 927 if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) && 928 nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type)) 929 return -EMSGSIZE; 930 931 if ((m->mask & RDMA_COUNTER_MASK_PID) && 932 fill_res_name_pid(msg, &counter->res)) 933 return -EMSGSIZE; 934 } 935 936 return 0; 937 } 938 939 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn) 940 { 941 struct nlattr *entry_attr; 942 943 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY); 944 if (!entry_attr) 945 return -EMSGSIZE; 946 947 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) 948 goto err; 949 950 nla_nest_end(msg, entry_attr); 951 return 0; 952 953 err: 954 nla_nest_cancel(msg, entry_attr); 955 return -EMSGSIZE; 956 } 957 958 static int fill_stat_counter_qps(struct sk_buff *msg, 959 struct rdma_counter *counter) 960 { 961 struct rdma_restrack_entry *res; 962 struct rdma_restrack_root *rt; 963 struct nlattr *table_attr; 964 struct ib_qp *qp = NULL; 965 unsigned long id = 0; 966 int ret = 0; 967 968 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP); 969 if (!table_attr) 970 return -EMSGSIZE; 971 972 rt = &counter->device->res[RDMA_RESTRACK_QP]; 973 xa_lock(&rt->xa); 974 xa_for_each(&rt->xa, id, res) { 975 qp = container_of(res, struct ib_qp, res); 976 if (!qp->counter || (qp->counter->id != counter->id)) 977 continue; 978 979 ret = fill_stat_counter_qp_entry(msg, qp->qp_num); 980 if (ret) 981 goto err; 982 } 983 984 xa_unlock(&rt->xa); 985 nla_nest_end(msg, table_attr); 986 return 0; 987 988 err: 989 xa_unlock(&rt->xa); 990 nla_nest_cancel(msg, table_attr); 991 return ret; 992 } 993 994 int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, 995 u64 value) 996 { 997 struct nlattr *entry_attr; 998 999 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY); 1000 if (!entry_attr) 1001 return -EMSGSIZE; 1002 1003 if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, 1004 name)) 1005 goto err; 1006 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE, 1007 value, RDMA_NLDEV_ATTR_PAD)) 1008 goto err; 1009 1010 nla_nest_end(msg, entry_attr); 1011 return 0; 1012 1013 err: 1014 nla_nest_cancel(msg, entry_attr); 1015 return -EMSGSIZE; 1016 } 1017 EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry); 1018 1019 static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, 1020 struct rdma_restrack_entry *res, uint32_t port) 1021 { 1022 struct ib_mr *mr = container_of(res, struct ib_mr, res); 1023 struct ib_device *dev = mr->device; 1024 1025 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) 1026 goto err; 1027 1028 if (dev->ops.fill_stat_mr_entry) 1029 return dev->ops.fill_stat_mr_entry(msg, mr); 1030 return 0; 1031 1032 err: 1033 return -EMSGSIZE; 1034 } 1035 1036 static int fill_stat_counter_hwcounters(struct sk_buff *msg, 1037 struct rdma_counter *counter) 1038 { 1039 struct rdma_hw_stats *st = counter->stats; 1040 struct nlattr *table_attr; 1041 int i; 1042 1043 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); 1044 if (!table_attr) 1045 return -EMSGSIZE; 1046 1047 mutex_lock(&st->lock); 1048 for (i = 0; i < st->num_counters; i++) { 1049 if (test_bit(i, st->is_disabled)) 1050 continue; 1051 if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name, 1052 st->value[i])) 1053 goto err; 1054 } 1055 mutex_unlock(&st->lock); 1056 1057 nla_nest_end(msg, table_attr); 1058 return 0; 1059 1060 err: 1061 mutex_unlock(&st->lock); 1062 nla_nest_cancel(msg, table_attr); 1063 return -EMSGSIZE; 1064 } 1065 1066 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin, 1067 struct rdma_restrack_entry *res, 1068 uint32_t port) 1069 { 1070 struct rdma_counter *counter = 1071 container_of(res, struct rdma_counter, res); 1072 1073 if (port && port != counter->port) 1074 return -EAGAIN; 1075 1076 /* Dump it even query failed */ 1077 rdma_counter_query_stats(counter); 1078 1079 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) || 1080 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) || 1081 fill_stat_counter_mode(msg, counter) || 1082 fill_stat_counter_qps(msg, counter) || 1083 fill_stat_counter_hwcounters(msg, counter)) 1084 return -EMSGSIZE; 1085 1086 return 0; 1087 } 1088 1089 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1090 struct netlink_ext_ack *extack) 1091 { 1092 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1093 struct ib_device *device; 1094 struct sk_buff *msg; 1095 u32 index; 1096 int err; 1097 1098 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1099 nldev_policy, NL_VALIDATE_LIBERAL, extack); 1100 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1101 return -EINVAL; 1102 1103 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1104 1105 device = ib_device_get_by_index(sock_net(skb->sk), index); 1106 if (!device) 1107 return -EINVAL; 1108 1109 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1110 if (!msg) { 1111 err = -ENOMEM; 1112 goto err; 1113 } 1114 1115 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1116 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 1117 0, 0); 1118 if (!nlh) { 1119 err = -EMSGSIZE; 1120 goto err_free; 1121 } 1122 1123 err = fill_dev_info(msg, device); 1124 if (err) 1125 goto err_free; 1126 1127 nlmsg_end(msg, nlh); 1128 1129 ib_device_put(device); 1130 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 1131 1132 err_free: 1133 nlmsg_free(msg); 1134 err: 1135 ib_device_put(device); 1136 return err; 1137 } 1138 1139 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1140 struct netlink_ext_ack *extack) 1141 { 1142 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1143 struct ib_device *device; 1144 u32 index; 1145 int err; 1146 1147 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1148 nldev_policy, extack); 1149 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1150 return -EINVAL; 1151 1152 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1153 device = ib_device_get_by_index(sock_net(skb->sk), index); 1154 if (!device) 1155 return -EINVAL; 1156 1157 if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) { 1158 char name[IB_DEVICE_NAME_MAX] = {}; 1159 1160 nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], 1161 IB_DEVICE_NAME_MAX); 1162 if (strlen(name) == 0) { 1163 err = -EINVAL; 1164 goto done; 1165 } 1166 err = ib_device_rename(device, name); 1167 goto done; 1168 } 1169 1170 if (tb[RDMA_NLDEV_NET_NS_FD]) { 1171 u32 ns_fd; 1172 1173 ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]); 1174 err = ib_device_set_netns_put(skb, device, ns_fd); 1175 goto put_done; 1176 } 1177 1178 if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) { 1179 u8 use_dim; 1180 1181 use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]); 1182 err = ib_device_set_dim(device, use_dim); 1183 goto done; 1184 } 1185 1186 done: 1187 ib_device_put(device); 1188 put_done: 1189 return err; 1190 } 1191 1192 static int _nldev_get_dumpit(struct ib_device *device, 1193 struct sk_buff *skb, 1194 struct netlink_callback *cb, 1195 unsigned int idx) 1196 { 1197 int start = cb->args[0]; 1198 struct nlmsghdr *nlh; 1199 1200 if (idx < start) 1201 return 0; 1202 1203 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 1204 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 1205 0, NLM_F_MULTI); 1206 1207 if (!nlh || fill_dev_info(skb, device)) { 1208 nlmsg_cancel(skb, nlh); 1209 goto out; 1210 } 1211 1212 nlmsg_end(skb, nlh); 1213 1214 idx++; 1215 1216 out: cb->args[0] = idx; 1217 return skb->len; 1218 } 1219 1220 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) 1221 { 1222 /* 1223 * There is no need to take lock, because 1224 * we are relying on ib_core's locking. 1225 */ 1226 return ib_enum_all_devs(_nldev_get_dumpit, skb, cb); 1227 } 1228 1229 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1230 struct netlink_ext_ack *extack) 1231 { 1232 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1233 struct ib_device *device; 1234 struct sk_buff *msg; 1235 u32 index; 1236 u32 port; 1237 int err; 1238 1239 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1240 nldev_policy, NL_VALIDATE_LIBERAL, extack); 1241 if (err || 1242 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || 1243 !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 1244 return -EINVAL; 1245 1246 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1247 device = ib_device_get_by_index(sock_net(skb->sk), index); 1248 if (!device) 1249 return -EINVAL; 1250 1251 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 1252 if (!rdma_is_port_valid(device, port)) { 1253 err = -EINVAL; 1254 goto err; 1255 } 1256 1257 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1258 if (!msg) { 1259 err = -ENOMEM; 1260 goto err; 1261 } 1262 1263 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1264 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 1265 0, 0); 1266 if (!nlh) { 1267 err = -EMSGSIZE; 1268 goto err_free; 1269 } 1270 1271 err = fill_port_info(msg, device, port, sock_net(skb->sk)); 1272 if (err) 1273 goto err_free; 1274 1275 nlmsg_end(msg, nlh); 1276 ib_device_put(device); 1277 1278 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 1279 1280 err_free: 1281 nlmsg_free(msg); 1282 err: 1283 ib_device_put(device); 1284 return err; 1285 } 1286 1287 static int nldev_port_get_dumpit(struct sk_buff *skb, 1288 struct netlink_callback *cb) 1289 { 1290 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1291 struct ib_device *device; 1292 int start = cb->args[0]; 1293 struct nlmsghdr *nlh; 1294 u32 idx = 0; 1295 u32 ifindex; 1296 int err; 1297 unsigned int p; 1298 1299 err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1300 nldev_policy, NL_VALIDATE_LIBERAL, NULL); 1301 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1302 return -EINVAL; 1303 1304 ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1305 device = ib_device_get_by_index(sock_net(skb->sk), ifindex); 1306 if (!device) 1307 return -EINVAL; 1308 1309 rdma_for_each_port (device, p) { 1310 /* 1311 * The dumpit function returns all information from specific 1312 * index. This specific index is taken from the netlink 1313 * messages request sent by user and it is available 1314 * in cb->args[0]. 1315 * 1316 * Usually, the user doesn't fill this field and it causes 1317 * to return everything. 1318 * 1319 */ 1320 if (idx < start) { 1321 idx++; 1322 continue; 1323 } 1324 1325 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 1326 cb->nlh->nlmsg_seq, 1327 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1328 RDMA_NLDEV_CMD_PORT_GET), 1329 0, NLM_F_MULTI); 1330 1331 if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) { 1332 nlmsg_cancel(skb, nlh); 1333 goto out; 1334 } 1335 idx++; 1336 nlmsg_end(skb, nlh); 1337 } 1338 1339 out: 1340 ib_device_put(device); 1341 cb->args[0] = idx; 1342 return skb->len; 1343 } 1344 1345 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1346 struct netlink_ext_ack *extack) 1347 { 1348 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1349 bool show_details = false; 1350 struct ib_device *device; 1351 struct sk_buff *msg; 1352 u32 index; 1353 int ret; 1354 1355 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1356 nldev_policy, NL_VALIDATE_LIBERAL, extack); 1357 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1358 return -EINVAL; 1359 1360 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1361 device = ib_device_get_by_index(sock_net(skb->sk), index); 1362 if (!device) 1363 return -EINVAL; 1364 1365 if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]) 1366 show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]); 1367 1368 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1369 if (!msg) { 1370 ret = -ENOMEM; 1371 goto err; 1372 } 1373 1374 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1375 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 1376 0, 0); 1377 if (!nlh) { 1378 ret = -EMSGSIZE; 1379 goto err_free; 1380 } 1381 1382 ret = fill_res_info(msg, device, show_details); 1383 if (ret) 1384 goto err_free; 1385 1386 nlmsg_end(msg, nlh); 1387 ib_device_put(device); 1388 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 1389 1390 err_free: 1391 nlmsg_free(msg); 1392 err: 1393 ib_device_put(device); 1394 return ret; 1395 } 1396 1397 static int _nldev_res_get_dumpit(struct ib_device *device, 1398 struct sk_buff *skb, 1399 struct netlink_callback *cb, 1400 unsigned int idx) 1401 { 1402 int start = cb->args[0]; 1403 struct nlmsghdr *nlh; 1404 1405 if (idx < start) 1406 return 0; 1407 1408 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 1409 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 1410 0, NLM_F_MULTI); 1411 1412 if (!nlh || fill_res_info(skb, device, false)) { 1413 nlmsg_cancel(skb, nlh); 1414 goto out; 1415 } 1416 nlmsg_end(skb, nlh); 1417 1418 idx++; 1419 1420 out: 1421 cb->args[0] = idx; 1422 return skb->len; 1423 } 1424 1425 static int nldev_res_get_dumpit(struct sk_buff *skb, 1426 struct netlink_callback *cb) 1427 { 1428 return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb); 1429 } 1430 1431 struct nldev_fill_res_entry { 1432 enum rdma_nldev_attr nldev_attr; 1433 u8 flags; 1434 u32 entry; 1435 u32 id; 1436 }; 1437 1438 enum nldev_res_flags { 1439 NLDEV_PER_DEV = 1 << 0, 1440 }; 1441 1442 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { 1443 [RDMA_RESTRACK_QP] = { 1444 .nldev_attr = RDMA_NLDEV_ATTR_RES_QP, 1445 .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY, 1446 .id = RDMA_NLDEV_ATTR_RES_LQPN, 1447 }, 1448 [RDMA_RESTRACK_CM_ID] = { 1449 .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID, 1450 .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, 1451 .id = RDMA_NLDEV_ATTR_RES_CM_IDN, 1452 }, 1453 [RDMA_RESTRACK_CQ] = { 1454 .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ, 1455 .flags = NLDEV_PER_DEV, 1456 .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY, 1457 .id = RDMA_NLDEV_ATTR_RES_CQN, 1458 }, 1459 [RDMA_RESTRACK_MR] = { 1460 .nldev_attr = RDMA_NLDEV_ATTR_RES_MR, 1461 .flags = NLDEV_PER_DEV, 1462 .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY, 1463 .id = RDMA_NLDEV_ATTR_RES_MRN, 1464 }, 1465 [RDMA_RESTRACK_PD] = { 1466 .nldev_attr = RDMA_NLDEV_ATTR_RES_PD, 1467 .flags = NLDEV_PER_DEV, 1468 .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY, 1469 .id = RDMA_NLDEV_ATTR_RES_PDN, 1470 }, 1471 [RDMA_RESTRACK_COUNTER] = { 1472 .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER, 1473 .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, 1474 .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID, 1475 }, 1476 [RDMA_RESTRACK_CTX] = { 1477 .nldev_attr = RDMA_NLDEV_ATTR_RES_CTX, 1478 .flags = NLDEV_PER_DEV, 1479 .entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY, 1480 .id = RDMA_NLDEV_ATTR_RES_CTXN, 1481 }, 1482 [RDMA_RESTRACK_SRQ] = { 1483 .nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ, 1484 .flags = NLDEV_PER_DEV, 1485 .entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY, 1486 .id = RDMA_NLDEV_ATTR_RES_SRQN, 1487 }, 1488 1489 }; 1490 1491 static noinline_for_stack int 1492 res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1493 struct netlink_ext_ack *extack, 1494 enum rdma_restrack_type res_type, 1495 res_fill_func_t fill_func) 1496 { 1497 const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; 1498 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1499 struct rdma_restrack_entry *res; 1500 struct ib_device *device; 1501 u32 index, id, port = 0; 1502 bool has_cap_net_admin; 1503 struct sk_buff *msg; 1504 int ret; 1505 1506 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1507 nldev_policy, NL_VALIDATE_LIBERAL, extack); 1508 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id]) 1509 return -EINVAL; 1510 1511 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1512 device = ib_device_get_by_index(sock_net(skb->sk), index); 1513 if (!device) 1514 return -EINVAL; 1515 1516 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { 1517 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 1518 if (!rdma_is_port_valid(device, port)) { 1519 ret = -EINVAL; 1520 goto err; 1521 } 1522 } 1523 1524 if ((port && fe->flags & NLDEV_PER_DEV) || 1525 (!port && ~fe->flags & NLDEV_PER_DEV)) { 1526 ret = -EINVAL; 1527 goto err; 1528 } 1529 1530 id = nla_get_u32(tb[fe->id]); 1531 res = rdma_restrack_get_byid(device, res_type, id); 1532 if (IS_ERR(res)) { 1533 ret = PTR_ERR(res); 1534 goto err; 1535 } 1536 1537 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1538 if (!msg) { 1539 ret = -ENOMEM; 1540 goto err_get; 1541 } 1542 1543 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1544 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1545 RDMA_NL_GET_OP(nlh->nlmsg_type)), 1546 0, 0); 1547 1548 if (!nlh || fill_nldev_handle(msg, device)) { 1549 ret = -EMSGSIZE; 1550 goto err_free; 1551 } 1552 1553 has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN); 1554 1555 ret = fill_func(msg, has_cap_net_admin, res, port); 1556 if (ret) 1557 goto err_free; 1558 1559 rdma_restrack_put(res); 1560 nlmsg_end(msg, nlh); 1561 ib_device_put(device); 1562 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 1563 1564 err_free: 1565 nlmsg_free(msg); 1566 err_get: 1567 rdma_restrack_put(res); 1568 err: 1569 ib_device_put(device); 1570 return ret; 1571 } 1572 1573 static int res_get_common_dumpit(struct sk_buff *skb, 1574 struct netlink_callback *cb, 1575 enum rdma_restrack_type res_type, 1576 res_fill_func_t fill_func) 1577 { 1578 const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; 1579 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1580 struct rdma_restrack_entry *res; 1581 struct rdma_restrack_root *rt; 1582 int err, ret = 0, idx = 0; 1583 bool show_details = false; 1584 struct nlattr *table_attr; 1585 struct nlattr *entry_attr; 1586 struct ib_device *device; 1587 int start = cb->args[0]; 1588 bool has_cap_net_admin; 1589 struct nlmsghdr *nlh; 1590 unsigned long id; 1591 u32 index, port = 0; 1592 bool filled = false; 1593 1594 err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1595 nldev_policy, NL_VALIDATE_LIBERAL, NULL); 1596 /* 1597 * Right now, we are expecting the device index to get res information, 1598 * but it is possible to extend this code to return all devices in 1599 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX. 1600 * if it doesn't exist, we will iterate over all devices. 1601 * 1602 * But it is not needed for now. 1603 */ 1604 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1605 return -EINVAL; 1606 1607 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1608 device = ib_device_get_by_index(sock_net(skb->sk), index); 1609 if (!device) 1610 return -EINVAL; 1611 1612 if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]) 1613 show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]); 1614 1615 /* 1616 * If no PORT_INDEX is supplied, we will return all QPs from that device 1617 */ 1618 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { 1619 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 1620 if (!rdma_is_port_valid(device, port)) { 1621 ret = -EINVAL; 1622 goto err_index; 1623 } 1624 } 1625 1626 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 1627 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1628 RDMA_NL_GET_OP(cb->nlh->nlmsg_type)), 1629 0, NLM_F_MULTI); 1630 1631 if (!nlh || fill_nldev_handle(skb, device)) { 1632 ret = -EMSGSIZE; 1633 goto err; 1634 } 1635 1636 table_attr = nla_nest_start_noflag(skb, fe->nldev_attr); 1637 if (!table_attr) { 1638 ret = -EMSGSIZE; 1639 goto err; 1640 } 1641 1642 has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN); 1643 1644 rt = &device->res[res_type]; 1645 xa_lock(&rt->xa); 1646 /* 1647 * FIXME: if the skip ahead is something common this loop should 1648 * use xas_for_each & xas_pause to optimize, we can have a lot of 1649 * objects. 1650 */ 1651 xa_for_each(&rt->xa, id, res) { 1652 if (xa_get_mark(&rt->xa, res->id, RESTRACK_DD) && !show_details) 1653 goto next; 1654 1655 if (idx < start || !rdma_restrack_get(res)) 1656 goto next; 1657 1658 xa_unlock(&rt->xa); 1659 1660 filled = true; 1661 1662 entry_attr = nla_nest_start_noflag(skb, fe->entry); 1663 if (!entry_attr) { 1664 ret = -EMSGSIZE; 1665 rdma_restrack_put(res); 1666 goto msg_full; 1667 } 1668 1669 ret = fill_func(skb, has_cap_net_admin, res, port); 1670 1671 rdma_restrack_put(res); 1672 1673 if (ret) { 1674 nla_nest_cancel(skb, entry_attr); 1675 if (ret == -EMSGSIZE) 1676 goto msg_full; 1677 if (ret == -EAGAIN) 1678 goto again; 1679 goto res_err; 1680 } 1681 nla_nest_end(skb, entry_attr); 1682 again: xa_lock(&rt->xa); 1683 next: idx++; 1684 } 1685 xa_unlock(&rt->xa); 1686 1687 msg_full: 1688 nla_nest_end(skb, table_attr); 1689 nlmsg_end(skb, nlh); 1690 cb->args[0] = idx; 1691 1692 /* 1693 * No more entries to fill, cancel the message and 1694 * return 0 to mark end of dumpit. 1695 */ 1696 if (!filled) 1697 goto err; 1698 1699 ib_device_put(device); 1700 return skb->len; 1701 1702 res_err: 1703 nla_nest_cancel(skb, table_attr); 1704 1705 err: 1706 nlmsg_cancel(skb, nlh); 1707 1708 err_index: 1709 ib_device_put(device); 1710 return ret; 1711 } 1712 1713 #define RES_GET_FUNCS(name, type) \ 1714 static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ 1715 struct netlink_callback *cb) \ 1716 { \ 1717 return res_get_common_dumpit(skb, cb, type, \ 1718 fill_res_##name##_entry); \ 1719 } \ 1720 static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ 1721 struct nlmsghdr *nlh, \ 1722 struct netlink_ext_ack *extack) \ 1723 { \ 1724 return res_get_common_doit(skb, nlh, extack, type, \ 1725 fill_res_##name##_entry); \ 1726 } 1727 1728 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP); 1729 RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP); 1730 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID); 1731 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ); 1732 RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ); 1733 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD); 1734 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR); 1735 RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR); 1736 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER); 1737 RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX); 1738 RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ); 1739 RES_GET_FUNCS(srq_raw, RDMA_RESTRACK_SRQ); 1740 1741 static LIST_HEAD(link_ops); 1742 static DECLARE_RWSEM(link_ops_rwsem); 1743 1744 static const struct rdma_link_ops *link_ops_get(const char *type) 1745 { 1746 const struct rdma_link_ops *ops; 1747 1748 list_for_each_entry(ops, &link_ops, list) { 1749 if (!strcmp(ops->type, type)) 1750 goto out; 1751 } 1752 ops = NULL; 1753 out: 1754 return ops; 1755 } 1756 1757 void rdma_link_register(struct rdma_link_ops *ops) 1758 { 1759 down_write(&link_ops_rwsem); 1760 if (WARN_ON_ONCE(link_ops_get(ops->type))) 1761 goto out; 1762 list_add(&ops->list, &link_ops); 1763 out: 1764 up_write(&link_ops_rwsem); 1765 } 1766 EXPORT_SYMBOL(rdma_link_register); 1767 1768 void rdma_link_unregister(struct rdma_link_ops *ops) 1769 { 1770 down_write(&link_ops_rwsem); 1771 list_del(&ops->list); 1772 up_write(&link_ops_rwsem); 1773 } 1774 EXPORT_SYMBOL(rdma_link_unregister); 1775 1776 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, 1777 struct netlink_ext_ack *extack) 1778 { 1779 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1780 char ibdev_name[IB_DEVICE_NAME_MAX]; 1781 const struct rdma_link_ops *ops; 1782 char ndev_name[IFNAMSIZ]; 1783 struct net_device *ndev; 1784 char type[IFNAMSIZ]; 1785 int err; 1786 1787 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1788 nldev_policy, extack); 1789 if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || 1790 !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME]) 1791 return -EINVAL; 1792 1793 nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], 1794 sizeof(ibdev_name)); 1795 if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0) 1796 return -EINVAL; 1797 1798 nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); 1799 nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME], 1800 sizeof(ndev_name)); 1801 1802 ndev = dev_get_by_name(sock_net(skb->sk), ndev_name); 1803 if (!ndev) 1804 return -ENODEV; 1805 1806 down_read(&link_ops_rwsem); 1807 ops = link_ops_get(type); 1808 #ifdef CONFIG_MODULES 1809 if (!ops) { 1810 up_read(&link_ops_rwsem); 1811 request_module("rdma-link-%s", type); 1812 down_read(&link_ops_rwsem); 1813 ops = link_ops_get(type); 1814 } 1815 #endif 1816 err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL; 1817 up_read(&link_ops_rwsem); 1818 dev_put(ndev); 1819 1820 return err; 1821 } 1822 1823 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, 1824 struct netlink_ext_ack *extack) 1825 { 1826 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1827 struct ib_device *device; 1828 u32 index; 1829 int err; 1830 1831 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1832 nldev_policy, extack); 1833 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1834 return -EINVAL; 1835 1836 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1837 device = ib_device_get_by_index(sock_net(skb->sk), index); 1838 if (!device) 1839 return -EINVAL; 1840 1841 if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) { 1842 ib_device_put(device); 1843 return -EINVAL; 1844 } 1845 1846 /* 1847 * This path is triggered by the 'rdma link delete' administrative command. 1848 * For Soft-RoCE (RXE), we ensure that transport sockets are closed here. 1849 * Note: iWARP driver does not implement .dellink, so this logic is 1850 * implicitly scoped to the driver supporting dynamic link deletion like RXE. 1851 */ 1852 if (device->link_ops && device->link_ops->dellink) { 1853 mutex_lock(&nldev_dellink_mutex); 1854 err = device->link_ops->dellink(device); 1855 mutex_unlock(&nldev_dellink_mutex); 1856 if (err) 1857 return err; 1858 } 1859 1860 ib_unregister_device_and_put(device); 1861 return 0; 1862 } 1863 1864 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh, 1865 struct netlink_ext_ack *extack) 1866 { 1867 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1868 char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE]; 1869 struct ib_client_nl_info data = {}; 1870 struct ib_device *ibdev = NULL; 1871 struct sk_buff *msg; 1872 u32 index; 1873 int err; 1874 1875 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, 1876 NL_VALIDATE_LIBERAL, extack); 1877 if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE]) 1878 return -EINVAL; 1879 1880 nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE], 1881 sizeof(client_name)); 1882 1883 if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) { 1884 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1885 ibdev = ib_device_get_by_index(sock_net(skb->sk), index); 1886 if (!ibdev) 1887 return -EINVAL; 1888 1889 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { 1890 data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 1891 if (!rdma_is_port_valid(ibdev, data.port)) { 1892 err = -EINVAL; 1893 goto out_put; 1894 } 1895 } else { 1896 data.port = -1; 1897 } 1898 } else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { 1899 return -EINVAL; 1900 } 1901 1902 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1903 if (!msg) { 1904 err = -ENOMEM; 1905 goto out_put; 1906 } 1907 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1908 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1909 RDMA_NLDEV_CMD_GET_CHARDEV), 1910 0, 0); 1911 if (!nlh) { 1912 err = -EMSGSIZE; 1913 goto out_nlmsg; 1914 } 1915 1916 data.nl_msg = msg; 1917 err = ib_get_client_nl_info(ibdev, client_name, &data); 1918 if (err) 1919 goto out_nlmsg; 1920 1921 err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV, 1922 huge_encode_dev(data.cdev->devt), 1923 RDMA_NLDEV_ATTR_PAD); 1924 if (err) 1925 goto out_data; 1926 err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi, 1927 RDMA_NLDEV_ATTR_PAD); 1928 if (err) 1929 goto out_data; 1930 if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME, 1931 dev_name(data.cdev))) { 1932 err = -EMSGSIZE; 1933 goto out_data; 1934 } 1935 1936 nlmsg_end(msg, nlh); 1937 put_device(data.cdev); 1938 if (ibdev) 1939 ib_device_put(ibdev); 1940 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 1941 1942 out_data: 1943 put_device(data.cdev); 1944 out_nlmsg: 1945 nlmsg_free(msg); 1946 out_put: 1947 if (ibdev) 1948 ib_device_put(ibdev); 1949 return err; 1950 } 1951 1952 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1953 struct netlink_ext_ack *extack) 1954 { 1955 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1956 struct sk_buff *msg; 1957 int err; 1958 1959 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1960 nldev_policy, NL_VALIDATE_LIBERAL, extack); 1961 if (err) 1962 return err; 1963 1964 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1965 if (!msg) 1966 return -ENOMEM; 1967 1968 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1969 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1970 RDMA_NLDEV_CMD_SYS_GET), 1971 0, 0); 1972 if (!nlh) { 1973 nlmsg_free(msg); 1974 return -EMSGSIZE; 1975 } 1976 1977 err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE, 1978 (u8)ib_devices_shared_netns); 1979 if (err) { 1980 nlmsg_free(msg); 1981 return err; 1982 } 1983 1984 err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE, 1985 (u8)privileged_qkey); 1986 if (err) { 1987 nlmsg_free(msg); 1988 return err; 1989 } 1990 1991 err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, 1); 1992 if (err) { 1993 nlmsg_free(msg); 1994 return err; 1995 } 1996 /* 1997 * Copy-on-fork is supported. 1998 * See commits: 1999 * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes") 2000 * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm") 2001 * for more details. Don't backport this without them. 2002 * 2003 * Return value ignored on purpose, assume copy-on-fork is not 2004 * supported in case of failure. 2005 */ 2006 nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1); 2007 2008 nlmsg_end(msg, nlh); 2009 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2010 } 2011 2012 static int nldev_set_sys_set_netns_doit(struct nlattr *tb[]) 2013 { 2014 u8 enable; 2015 int err; 2016 2017 enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]); 2018 /* Only 0 and 1 are supported */ 2019 if (enable > 1) 2020 return -EINVAL; 2021 2022 err = rdma_compatdev_set(enable); 2023 return err; 2024 } 2025 2026 static int nldev_set_sys_set_pqkey_doit(struct nlattr *tb[]) 2027 { 2028 u8 enable; 2029 2030 enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]); 2031 /* Only 0 and 1 are supported */ 2032 if (enable > 1) 2033 return -EINVAL; 2034 2035 privileged_qkey = enable; 2036 return 0; 2037 } 2038 2039 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 2040 struct netlink_ext_ack *extack) 2041 { 2042 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2043 int err; 2044 2045 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2046 nldev_policy, extack); 2047 if (err) 2048 return -EINVAL; 2049 2050 if (tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]) 2051 return nldev_set_sys_set_netns_doit(tb); 2052 2053 if (tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]) 2054 return nldev_set_sys_set_pqkey_doit(tb); 2055 2056 return -EINVAL; 2057 } 2058 2059 2060 static int nldev_stat_set_mode_doit(struct sk_buff *msg, 2061 struct netlink_ext_ack *extack, 2062 struct nlattr *tb[], 2063 struct ib_device *device, u32 port) 2064 { 2065 u32 mode, mask = 0, qpn, cntn = 0; 2066 bool opcnt = false; 2067 int ret; 2068 2069 /* Currently only counter for QP is supported */ 2070 if (!tb[RDMA_NLDEV_ATTR_STAT_RES] || 2071 nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) 2072 return -EINVAL; 2073 2074 if (tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]) 2075 opcnt = !!nla_get_u8( 2076 tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]); 2077 2078 mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]); 2079 if (mode == RDMA_COUNTER_MODE_AUTO) { 2080 if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) 2081 mask = nla_get_u32( 2082 tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); 2083 return rdma_counter_set_auto_mode(device, port, mask, opcnt, 2084 extack); 2085 } 2086 2087 if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) 2088 return -EINVAL; 2089 2090 qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); 2091 if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { 2092 cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); 2093 ret = rdma_counter_bind_qpn(device, port, qpn, cntn); 2094 if (ret) 2095 return ret; 2096 } else { 2097 ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn); 2098 if (ret) 2099 return ret; 2100 } 2101 2102 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || 2103 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { 2104 ret = -EMSGSIZE; 2105 goto err_fill; 2106 } 2107 2108 return 0; 2109 2110 err_fill: 2111 rdma_counter_unbind_qpn(device, port, qpn, cntn); 2112 return ret; 2113 } 2114 2115 static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[], 2116 struct ib_device *device, 2117 u32 port) 2118 { 2119 struct rdma_hw_stats *stats; 2120 struct nlattr *entry_attr; 2121 unsigned long *target; 2122 int rem, i, ret = 0; 2123 u32 index; 2124 2125 stats = ib_get_hw_stats_port(device, port); 2126 if (!stats) 2127 return -EINVAL; 2128 2129 target = kcalloc(BITS_TO_LONGS(stats->num_counters), 2130 sizeof(*stats->is_disabled), GFP_KERNEL); 2131 if (!target) 2132 return -ENOMEM; 2133 2134 nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS], 2135 rem) { 2136 index = nla_get_u32(entry_attr); 2137 if ((index >= stats->num_counters) || 2138 !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) { 2139 ret = -EINVAL; 2140 goto out; 2141 } 2142 2143 set_bit(index, target); 2144 } 2145 2146 for (i = 0; i < stats->num_counters; i++) { 2147 if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL)) 2148 continue; 2149 2150 ret = rdma_counter_modify(device, port, i, test_bit(i, target)); 2151 if (ret) 2152 goto out; 2153 } 2154 2155 out: 2156 kfree(target); 2157 return ret; 2158 } 2159 2160 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 2161 struct netlink_ext_ack *extack) 2162 { 2163 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2164 struct ib_device *device; 2165 struct sk_buff *msg; 2166 u32 index, port; 2167 int ret; 2168 2169 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, 2170 extack); 2171 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || 2172 !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 2173 return -EINVAL; 2174 2175 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2176 device = ib_device_get_by_index(sock_net(skb->sk), index); 2177 if (!device) 2178 return -EINVAL; 2179 2180 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2181 if (!rdma_is_port_valid(device, port)) { 2182 ret = -EINVAL; 2183 goto err_put_device; 2184 } 2185 2186 if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] && 2187 !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { 2188 ret = -EINVAL; 2189 goto err_put_device; 2190 } 2191 2192 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2193 if (!msg) { 2194 ret = -ENOMEM; 2195 goto err_put_device; 2196 } 2197 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2198 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 2199 RDMA_NLDEV_CMD_STAT_SET), 2200 0, 0); 2201 if (!nlh || fill_nldev_handle(msg, device) || 2202 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { 2203 ret = -EMSGSIZE; 2204 goto err_free_msg; 2205 } 2206 2207 if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) { 2208 ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port); 2209 if (ret) 2210 goto err_free_msg; 2211 } 2212 2213 if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { 2214 ret = nldev_stat_set_counter_dynamic_doit(tb, device, port); 2215 if (ret) 2216 goto err_free_msg; 2217 } 2218 2219 nlmsg_end(msg, nlh); 2220 ib_device_put(device); 2221 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2222 2223 err_free_msg: 2224 nlmsg_free(msg); 2225 err_put_device: 2226 ib_device_put(device); 2227 return ret; 2228 } 2229 2230 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 2231 struct netlink_ext_ack *extack) 2232 { 2233 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2234 struct ib_device *device; 2235 struct sk_buff *msg; 2236 u32 index, port, qpn, cntn; 2237 int ret; 2238 2239 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2240 nldev_policy, extack); 2241 if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] || 2242 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || 2243 !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] || 2244 !tb[RDMA_NLDEV_ATTR_RES_LQPN]) 2245 return -EINVAL; 2246 2247 if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) 2248 return -EINVAL; 2249 2250 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2251 device = ib_device_get_by_index(sock_net(skb->sk), index); 2252 if (!device) 2253 return -EINVAL; 2254 2255 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2256 if (!rdma_is_port_valid(device, port)) { 2257 ret = -EINVAL; 2258 goto err; 2259 } 2260 2261 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2262 if (!msg) { 2263 ret = -ENOMEM; 2264 goto err; 2265 } 2266 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2267 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 2268 RDMA_NLDEV_CMD_STAT_SET), 2269 0, 0); 2270 if (!nlh) { 2271 ret = -EMSGSIZE; 2272 goto err_fill; 2273 } 2274 2275 cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); 2276 qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); 2277 if (fill_nldev_handle(msg, device) || 2278 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || 2279 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || 2280 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { 2281 ret = -EMSGSIZE; 2282 goto err_fill; 2283 } 2284 2285 ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); 2286 if (ret) 2287 goto err_fill; 2288 2289 nlmsg_end(msg, nlh); 2290 ib_device_put(device); 2291 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2292 2293 err_fill: 2294 nlmsg_free(msg); 2295 err: 2296 ib_device_put(device); 2297 return ret; 2298 } 2299 2300 static noinline_for_stack int 2301 stat_get_doit_default_counter(struct sk_buff *skb, struct nlmsghdr *nlh, 2302 struct netlink_ext_ack *extack, 2303 struct nlattr *tb[]) 2304 { 2305 struct rdma_hw_stats *stats; 2306 struct nlattr *table_attr; 2307 struct ib_device *device; 2308 int ret, num_cnts, i; 2309 struct sk_buff *msg; 2310 u32 index, port; 2311 u64 v; 2312 2313 if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 2314 return -EINVAL; 2315 2316 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2317 device = ib_device_get_by_index(sock_net(skb->sk), index); 2318 if (!device) 2319 return -EINVAL; 2320 2321 if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) { 2322 ret = -EINVAL; 2323 goto err; 2324 } 2325 2326 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2327 stats = ib_get_hw_stats_port(device, port); 2328 if (!stats) { 2329 ret = -EINVAL; 2330 goto err; 2331 } 2332 2333 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2334 if (!msg) { 2335 ret = -ENOMEM; 2336 goto err; 2337 } 2338 2339 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2340 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 2341 RDMA_NLDEV_CMD_STAT_GET), 2342 0, 0); 2343 2344 if (!nlh || fill_nldev_handle(msg, device) || 2345 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { 2346 ret = -EMSGSIZE; 2347 goto err_msg; 2348 } 2349 2350 mutex_lock(&stats->lock); 2351 2352 num_cnts = device->ops.get_hw_stats(device, stats, port, 0); 2353 if (num_cnts < 0) { 2354 ret = -EINVAL; 2355 goto err_stats; 2356 } 2357 2358 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); 2359 if (!table_attr) { 2360 ret = -EMSGSIZE; 2361 goto err_stats; 2362 } 2363 for (i = 0; i < num_cnts; i++) { 2364 if (test_bit(i, stats->is_disabled)) 2365 continue; 2366 2367 v = stats->value[i] + 2368 rdma_counter_get_hwstat_value(device, port, i); 2369 if (rdma_nl_stat_hwcounter_entry(msg, 2370 stats->descs[i].name, v)) { 2371 ret = -EMSGSIZE; 2372 goto err_table; 2373 } 2374 } 2375 nla_nest_end(msg, table_attr); 2376 2377 mutex_unlock(&stats->lock); 2378 nlmsg_end(msg, nlh); 2379 ib_device_put(device); 2380 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2381 2382 err_table: 2383 nla_nest_cancel(msg, table_attr); 2384 err_stats: 2385 mutex_unlock(&stats->lock); 2386 err_msg: 2387 nlmsg_free(msg); 2388 err: 2389 ib_device_put(device); 2390 return ret; 2391 } 2392 2393 static noinline_for_stack int 2394 stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh, 2395 struct netlink_ext_ack *extack, struct nlattr *tb[]) 2396 2397 { 2398 static enum rdma_nl_counter_mode mode; 2399 static enum rdma_nl_counter_mask mask; 2400 struct ib_device *device; 2401 struct sk_buff *msg; 2402 u32 index, port; 2403 bool opcnt; 2404 int ret; 2405 2406 if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) 2407 return nldev_res_get_counter_doit(skb, nlh, extack); 2408 2409 if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] || 2410 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 2411 return -EINVAL; 2412 2413 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2414 device = ib_device_get_by_index(sock_net(skb->sk), index); 2415 if (!device) 2416 return -EINVAL; 2417 2418 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2419 if (!rdma_is_port_valid(device, port)) { 2420 ret = -EINVAL; 2421 goto err; 2422 } 2423 2424 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2425 if (!msg) { 2426 ret = -ENOMEM; 2427 goto err; 2428 } 2429 2430 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2431 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 2432 RDMA_NLDEV_CMD_STAT_GET), 2433 0, 0); 2434 if (!nlh) { 2435 ret = -EMSGSIZE; 2436 goto err_msg; 2437 } 2438 2439 ret = rdma_counter_get_mode(device, port, &mode, &mask, &opcnt); 2440 if (ret) 2441 goto err_msg; 2442 2443 if (fill_nldev_handle(msg, device) || 2444 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || 2445 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) { 2446 ret = -EMSGSIZE; 2447 goto err_msg; 2448 } 2449 2450 if ((mode == RDMA_COUNTER_MODE_AUTO) && 2451 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) { 2452 ret = -EMSGSIZE; 2453 goto err_msg; 2454 } 2455 2456 if ((mode == RDMA_COUNTER_MODE_AUTO) && 2457 nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, opcnt)) { 2458 ret = -EMSGSIZE; 2459 goto err_msg; 2460 } 2461 2462 nlmsg_end(msg, nlh); 2463 ib_device_put(device); 2464 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2465 2466 err_msg: 2467 nlmsg_free(msg); 2468 err: 2469 ib_device_put(device); 2470 return ret; 2471 } 2472 2473 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 2474 struct netlink_ext_ack *extack) 2475 { 2476 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2477 int ret; 2478 2479 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2480 nldev_policy, NL_VALIDATE_LIBERAL, extack); 2481 if (ret) 2482 return -EINVAL; 2483 2484 if (!tb[RDMA_NLDEV_ATTR_STAT_RES]) 2485 return stat_get_doit_default_counter(skb, nlh, extack, tb); 2486 2487 switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) { 2488 case RDMA_NLDEV_ATTR_RES_QP: 2489 ret = stat_get_doit_qp(skb, nlh, extack, tb); 2490 break; 2491 case RDMA_NLDEV_ATTR_RES_MR: 2492 ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR, 2493 fill_stat_mr_entry); 2494 break; 2495 default: 2496 ret = -EINVAL; 2497 break; 2498 } 2499 2500 return ret; 2501 } 2502 2503 static int nldev_stat_get_dumpit(struct sk_buff *skb, 2504 struct netlink_callback *cb) 2505 { 2506 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2507 int ret; 2508 2509 ret = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2510 nldev_policy, NL_VALIDATE_LIBERAL, NULL); 2511 if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES]) 2512 return -EINVAL; 2513 2514 switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) { 2515 case RDMA_NLDEV_ATTR_RES_QP: 2516 ret = nldev_res_get_counter_dumpit(skb, cb); 2517 break; 2518 case RDMA_NLDEV_ATTR_RES_MR: 2519 ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR, 2520 fill_stat_mr_entry); 2521 break; 2522 default: 2523 ret = -EINVAL; 2524 break; 2525 } 2526 2527 return ret; 2528 } 2529 2530 static int nldev_stat_get_counter_status_doit(struct sk_buff *skb, 2531 struct nlmsghdr *nlh, 2532 struct netlink_ext_ack *extack) 2533 { 2534 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry; 2535 struct rdma_hw_stats *stats; 2536 struct ib_device *device; 2537 struct sk_buff *msg; 2538 u32 devid, port; 2539 int ret, i; 2540 2541 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2542 nldev_policy, NL_VALIDATE_LIBERAL, extack); 2543 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || 2544 !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 2545 return -EINVAL; 2546 2547 devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2548 device = ib_device_get_by_index(sock_net(skb->sk), devid); 2549 if (!device) 2550 return -EINVAL; 2551 2552 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2553 if (!rdma_is_port_valid(device, port)) { 2554 ret = -EINVAL; 2555 goto err; 2556 } 2557 2558 stats = ib_get_hw_stats_port(device, port); 2559 if (!stats) { 2560 ret = -EINVAL; 2561 goto err; 2562 } 2563 2564 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2565 if (!msg) { 2566 ret = -ENOMEM; 2567 goto err; 2568 } 2569 2570 nlh = nlmsg_put( 2571 msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2572 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS), 2573 0, 0); 2574 2575 ret = -EMSGSIZE; 2576 if (!nlh || fill_nldev_handle(msg, device) || 2577 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) 2578 goto err_msg; 2579 2580 table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); 2581 if (!table) 2582 goto err_msg; 2583 2584 mutex_lock(&stats->lock); 2585 for (i = 0; i < stats->num_counters; i++) { 2586 entry = nla_nest_start(msg, 2587 RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY); 2588 if (!entry) 2589 goto err_msg_table; 2590 2591 if (nla_put_string(msg, 2592 RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, 2593 stats->descs[i].name) || 2594 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i)) 2595 goto err_msg_entry; 2596 2597 if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) && 2598 (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC, 2599 !test_bit(i, stats->is_disabled)))) 2600 goto err_msg_entry; 2601 2602 nla_nest_end(msg, entry); 2603 } 2604 mutex_unlock(&stats->lock); 2605 2606 nla_nest_end(msg, table); 2607 nlmsg_end(msg, nlh); 2608 ib_device_put(device); 2609 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2610 2611 err_msg_entry: 2612 nla_nest_cancel(msg, entry); 2613 err_msg_table: 2614 mutex_unlock(&stats->lock); 2615 nla_nest_cancel(msg, table); 2616 err_msg: 2617 nlmsg_free(msg); 2618 err: 2619 ib_device_put(device); 2620 return ret; 2621 } 2622 2623 static int nldev_newdev(struct sk_buff *skb, struct nlmsghdr *nlh, 2624 struct netlink_ext_ack *extack) 2625 { 2626 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2627 enum rdma_nl_dev_type type; 2628 struct ib_device *parent; 2629 char name[IFNAMSIZ] = {}; 2630 u32 parentid; 2631 int ret; 2632 2633 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2634 nldev_policy, extack); 2635 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || 2636 !tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_TYPE]) 2637 return -EINVAL; 2638 2639 nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(name)); 2640 type = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_TYPE]); 2641 parentid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2642 parent = ib_device_get_by_index(sock_net(skb->sk), parentid); 2643 if (!parent) 2644 return -EINVAL; 2645 2646 ret = ib_add_sub_device(parent, type, name); 2647 ib_device_put(parent); 2648 2649 return ret; 2650 } 2651 2652 static int nldev_deldev(struct sk_buff *skb, struct nlmsghdr *nlh, 2653 struct netlink_ext_ack *extack) 2654 { 2655 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2656 struct ib_device *device; 2657 u32 devid; 2658 int ret; 2659 2660 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2661 nldev_policy, extack); 2662 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 2663 return -EINVAL; 2664 2665 devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2666 device = ib_device_get_by_index(sock_net(skb->sk), devid); 2667 if (!device) 2668 return -EINVAL; 2669 2670 return ib_del_sub_device_and_put(device); 2671 } 2672 2673 static int fill_frmr_pool_key(struct sk_buff *msg, struct ib_frmr_key *key) 2674 { 2675 struct nlattr *key_attr; 2676 2677 key_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY); 2678 if (!key_attr) 2679 return -EMSGSIZE; 2680 2681 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS, key->ats)) 2682 goto err; 2683 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS, 2684 key->access_flags)) 2685 goto err; 2686 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY, 2687 key->vendor_key, RDMA_NLDEV_ATTR_PAD)) 2688 goto err; 2689 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS, 2690 key->num_dma_blocks, RDMA_NLDEV_ATTR_PAD)) 2691 goto err; 2692 2693 if (key->kernel_vendor_key && 2694 nla_put_u64_64bit(msg, 2695 RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY, 2696 key->kernel_vendor_key, RDMA_NLDEV_ATTR_PAD)) 2697 goto err; 2698 2699 nla_nest_end(msg, key_attr); 2700 return 0; 2701 2702 err: 2703 return -EMSGSIZE; 2704 } 2705 2706 static int fill_frmr_pool_entry(struct sk_buff *msg, struct ib_frmr_pool *pool) 2707 { 2708 if (fill_frmr_pool_key(msg, &pool->key)) 2709 return -EMSGSIZE; 2710 2711 spin_lock(&pool->lock); 2712 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_QUEUE_HANDLES, 2713 pool->queue.ci + pool->inactive_queue.ci)) 2714 goto err_unlock; 2715 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_MAX_IN_USE, 2716 pool->max_in_use, RDMA_NLDEV_ATTR_PAD)) 2717 goto err_unlock; 2718 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_IN_USE, 2719 pool->in_use, RDMA_NLDEV_ATTR_PAD)) 2720 goto err_unlock; 2721 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES, 2722 pool->pinned_handles)) 2723 goto err_unlock; 2724 spin_unlock(&pool->lock); 2725 2726 return 0; 2727 2728 err_unlock: 2729 spin_unlock(&pool->lock); 2730 return -EMSGSIZE; 2731 } 2732 2733 static int nldev_frmr_pools_parse_key(struct nlattr *tb[], 2734 struct ib_frmr_key *key, 2735 struct netlink_ext_ack *extack) 2736 { 2737 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS]) 2738 key->ats = nla_get_u8(tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS]); 2739 2740 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS]) 2741 key->access_flags = nla_get_u32( 2742 tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS]); 2743 2744 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY]) 2745 key->vendor_key = nla_get_u64( 2746 tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY]); 2747 2748 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS]) 2749 key->num_dma_blocks = nla_get_u64( 2750 tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS]); 2751 2752 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY]) 2753 return -EINVAL; 2754 2755 return 0; 2756 } 2757 2758 static int nldev_frmr_pools_set_pinned(struct ib_device *device, 2759 struct nlattr *tb[], 2760 struct netlink_ext_ack *extack) 2761 { 2762 struct nlattr *key_tb[RDMA_NLDEV_ATTR_MAX]; 2763 struct ib_frmr_key key = { 0 }; 2764 u32 pinned_handles = 0; 2765 int err = 0; 2766 2767 pinned_handles = 2768 nla_get_u32(tb[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES]); 2769 2770 if (!tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY]) 2771 return -EINVAL; 2772 2773 err = nla_parse_nested(key_tb, RDMA_NLDEV_ATTR_MAX - 1, 2774 tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY], nldev_policy, 2775 extack); 2776 if (err) 2777 return err; 2778 2779 err = nldev_frmr_pools_parse_key(key_tb, &key, extack); 2780 if (err) 2781 return err; 2782 2783 err = ib_frmr_pools_set_pinned(device, &key, pinned_handles); 2784 2785 return err; 2786 } 2787 2788 static int nldev_frmr_pools_get_dumpit(struct sk_buff *skb, 2789 struct netlink_callback *cb) 2790 { 2791 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 2792 struct ib_frmr_pools *pools; 2793 int err, ret = 0, idx = 0; 2794 struct ib_frmr_pool *pool; 2795 struct nlattr *table_attr; 2796 struct nlattr *entry_attr; 2797 bool show_details = false; 2798 struct ib_device *device; 2799 int start = cb->args[0]; 2800 struct rb_node *node; 2801 struct nlmsghdr *nlh; 2802 bool filled = false; 2803 2804 err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2805 nldev_policy, NL_VALIDATE_LIBERAL, NULL); 2806 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 2807 return -EINVAL; 2808 2809 device = ib_device_get_by_index( 2810 sock_net(skb->sk), nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX])); 2811 if (!device) 2812 return -EINVAL; 2813 2814 if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]) 2815 show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]); 2816 2817 pools = device->frmr_pools; 2818 if (!pools) { 2819 ib_device_put(device); 2820 return 0; 2821 } 2822 2823 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 2824 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 2825 RDMA_NLDEV_CMD_FRMR_POOLS_GET), 2826 0, NLM_F_MULTI); 2827 2828 if (!nlh || fill_nldev_handle(skb, device)) { 2829 ret = -EMSGSIZE; 2830 goto err; 2831 } 2832 2833 table_attr = nla_nest_start_noflag(skb, RDMA_NLDEV_ATTR_FRMR_POOLS); 2834 if (!table_attr) { 2835 ret = -EMSGSIZE; 2836 goto err; 2837 } 2838 2839 read_lock(&pools->rb_lock); 2840 for (node = rb_first(&pools->rb_root); node; node = rb_next(node)) { 2841 pool = rb_entry(node, struct ib_frmr_pool, node); 2842 if (pool->key.kernel_vendor_key && !show_details) 2843 continue; 2844 2845 if (idx < start) { 2846 idx++; 2847 continue; 2848 } 2849 2850 filled = true; 2851 2852 entry_attr = nla_nest_start_noflag( 2853 skb, RDMA_NLDEV_ATTR_FRMR_POOL_ENTRY); 2854 if (!entry_attr) { 2855 ret = -EMSGSIZE; 2856 goto end_msg; 2857 } 2858 2859 if (fill_frmr_pool_entry(skb, pool)) { 2860 nla_nest_cancel(skb, entry_attr); 2861 ret = -EMSGSIZE; 2862 goto end_msg; 2863 } 2864 2865 nla_nest_end(skb, entry_attr); 2866 idx++; 2867 } 2868 end_msg: 2869 read_unlock(&pools->rb_lock); 2870 2871 nla_nest_end(skb, table_attr); 2872 nlmsg_end(skb, nlh); 2873 cb->args[0] = idx; 2874 2875 /* 2876 * No more entries to fill, cancel the message and 2877 * return 0 to mark end of dumpit. 2878 */ 2879 if (!filled) 2880 goto err; 2881 2882 ib_device_put(device); 2883 return skb->len; 2884 2885 err: 2886 nlmsg_cancel(skb, nlh); 2887 ib_device_put(device); 2888 return ret; 2889 } 2890 2891 static int nldev_frmr_pools_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 2892 struct netlink_ext_ack *extack) 2893 { 2894 struct ib_device *device; 2895 struct nlattr **tb; 2896 u32 aging_period; 2897 int err; 2898 2899 tb = kzalloc_objs(*tb, RDMA_NLDEV_ATTR_MAX, GFP_KERNEL); 2900 if (!tb) 2901 return -ENOMEM; 2902 2903 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, 2904 extack); 2905 if (err) 2906 goto free_tb; 2907 2908 if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX]) { 2909 err = -EINVAL; 2910 goto free_tb; 2911 } 2912 2913 device = ib_device_get_by_index( 2914 sock_net(skb->sk), nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX])); 2915 if (!device) { 2916 err = -EINVAL; 2917 goto free_tb; 2918 } 2919 2920 if (tb[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD]) { 2921 aging_period = nla_get_u32( 2922 tb[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD]); 2923 err = ib_frmr_pools_set_aging_period(device, aging_period); 2924 goto done; 2925 } 2926 2927 if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES]) 2928 err = nldev_frmr_pools_set_pinned(device, tb, extack); 2929 2930 done: 2931 ib_device_put(device); 2932 free_tb: 2933 kfree(tb); 2934 return err; 2935 } 2936 2937 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { 2938 [RDMA_NLDEV_CMD_GET] = { 2939 .doit = nldev_get_doit, 2940 .dump = nldev_get_dumpit, 2941 }, 2942 [RDMA_NLDEV_CMD_GET_CHARDEV] = { 2943 .doit = nldev_get_chardev, 2944 }, 2945 [RDMA_NLDEV_CMD_SET] = { 2946 .doit = nldev_set_doit, 2947 .flags = RDMA_NL_ADMIN_PERM, 2948 }, 2949 [RDMA_NLDEV_CMD_NEWLINK] = { 2950 .doit = nldev_newlink, 2951 .flags = RDMA_NL_ADMIN_PERM, 2952 }, 2953 [RDMA_NLDEV_CMD_DELLINK] = { 2954 .doit = nldev_dellink, 2955 .flags = RDMA_NL_ADMIN_PERM, 2956 }, 2957 [RDMA_NLDEV_CMD_PORT_GET] = { 2958 .doit = nldev_port_get_doit, 2959 .dump = nldev_port_get_dumpit, 2960 }, 2961 [RDMA_NLDEV_CMD_RES_GET] = { 2962 .doit = nldev_res_get_doit, 2963 .dump = nldev_res_get_dumpit, 2964 }, 2965 [RDMA_NLDEV_CMD_RES_QP_GET] = { 2966 .doit = nldev_res_get_qp_doit, 2967 .dump = nldev_res_get_qp_dumpit, 2968 }, 2969 [RDMA_NLDEV_CMD_RES_CM_ID_GET] = { 2970 .doit = nldev_res_get_cm_id_doit, 2971 .dump = nldev_res_get_cm_id_dumpit, 2972 }, 2973 [RDMA_NLDEV_CMD_RES_CQ_GET] = { 2974 .doit = nldev_res_get_cq_doit, 2975 .dump = nldev_res_get_cq_dumpit, 2976 }, 2977 [RDMA_NLDEV_CMD_RES_MR_GET] = { 2978 .doit = nldev_res_get_mr_doit, 2979 .dump = nldev_res_get_mr_dumpit, 2980 }, 2981 [RDMA_NLDEV_CMD_RES_PD_GET] = { 2982 .doit = nldev_res_get_pd_doit, 2983 .dump = nldev_res_get_pd_dumpit, 2984 }, 2985 [RDMA_NLDEV_CMD_RES_CTX_GET] = { 2986 .doit = nldev_res_get_ctx_doit, 2987 .dump = nldev_res_get_ctx_dumpit, 2988 }, 2989 [RDMA_NLDEV_CMD_RES_SRQ_GET] = { 2990 .doit = nldev_res_get_srq_doit, 2991 .dump = nldev_res_get_srq_dumpit, 2992 }, 2993 [RDMA_NLDEV_CMD_SYS_GET] = { 2994 .doit = nldev_sys_get_doit, 2995 }, 2996 [RDMA_NLDEV_CMD_SYS_SET] = { 2997 .doit = nldev_set_sys_set_doit, 2998 .flags = RDMA_NL_ADMIN_PERM, 2999 }, 3000 [RDMA_NLDEV_CMD_STAT_SET] = { 3001 .doit = nldev_stat_set_doit, 3002 .flags = RDMA_NL_ADMIN_PERM, 3003 }, 3004 [RDMA_NLDEV_CMD_STAT_GET] = { 3005 .doit = nldev_stat_get_doit, 3006 .dump = nldev_stat_get_dumpit, 3007 }, 3008 [RDMA_NLDEV_CMD_STAT_DEL] = { 3009 .doit = nldev_stat_del_doit, 3010 .flags = RDMA_NL_ADMIN_PERM, 3011 }, 3012 [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = { 3013 .doit = nldev_res_get_qp_raw_doit, 3014 .dump = nldev_res_get_qp_raw_dumpit, 3015 .flags = RDMA_NL_ADMIN_PERM, 3016 }, 3017 [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = { 3018 .doit = nldev_res_get_cq_raw_doit, 3019 .dump = nldev_res_get_cq_raw_dumpit, 3020 .flags = RDMA_NL_ADMIN_PERM, 3021 }, 3022 [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = { 3023 .doit = nldev_res_get_mr_raw_doit, 3024 .dump = nldev_res_get_mr_raw_dumpit, 3025 .flags = RDMA_NL_ADMIN_PERM, 3026 }, 3027 [RDMA_NLDEV_CMD_RES_SRQ_GET_RAW] = { 3028 .doit = nldev_res_get_srq_raw_doit, 3029 .dump = nldev_res_get_srq_raw_dumpit, 3030 .flags = RDMA_NL_ADMIN_PERM, 3031 }, 3032 [RDMA_NLDEV_CMD_STAT_GET_STATUS] = { 3033 .doit = nldev_stat_get_counter_status_doit, 3034 }, 3035 [RDMA_NLDEV_CMD_NEWDEV] = { 3036 .doit = nldev_newdev, 3037 .flags = RDMA_NL_ADMIN_PERM, 3038 }, 3039 [RDMA_NLDEV_CMD_DELDEV] = { 3040 .doit = nldev_deldev, 3041 .flags = RDMA_NL_ADMIN_PERM, 3042 }, 3043 [RDMA_NLDEV_CMD_FRMR_POOLS_GET] = { 3044 .dump = nldev_frmr_pools_get_dumpit, 3045 }, 3046 [RDMA_NLDEV_CMD_FRMR_POOLS_SET] = { 3047 .doit = nldev_frmr_pools_set_doit, 3048 .flags = RDMA_NL_ADMIN_PERM, 3049 }, 3050 }; 3051 3052 static int fill_mon_netdev_rename(struct sk_buff *msg, 3053 struct ib_device *device, u32 port, 3054 const struct net *net) 3055 { 3056 struct net_device *netdev = ib_device_get_netdev(device, port); 3057 int ret = 0; 3058 3059 if (!netdev || !net_eq(dev_net(netdev), net)) 3060 goto out; 3061 3062 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); 3063 if (ret) 3064 goto out; 3065 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); 3066 out: 3067 dev_put(netdev); 3068 return ret; 3069 } 3070 3071 static int fill_mon_netdev_association(struct sk_buff *msg, 3072 struct ib_device *device, u32 port, 3073 const struct net *net) 3074 { 3075 struct net_device *netdev = ib_device_get_netdev(device, port); 3076 int ret = 0; 3077 3078 if (netdev && !net_eq(dev_net(netdev), net)) 3079 goto out; 3080 3081 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index); 3082 if (ret) 3083 goto out; 3084 3085 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, 3086 dev_name(&device->dev)); 3087 if (ret) 3088 goto out; 3089 3090 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port); 3091 if (ret) 3092 goto out; 3093 3094 if (netdev) { 3095 ret = nla_put_u32(msg, 3096 RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); 3097 if (ret) 3098 goto out; 3099 3100 ret = nla_put_string(msg, 3101 RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); 3102 } 3103 3104 out: 3105 dev_put(netdev); 3106 return ret; 3107 } 3108 3109 static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num, 3110 enum rdma_nl_notify_event_type type) 3111 { 3112 struct net_device *netdev; 3113 3114 switch (type) { 3115 case RDMA_REGISTER_EVENT: 3116 dev_warn_ratelimited(&device->dev, 3117 "Failed to send RDMA monitor register device event\n"); 3118 break; 3119 case RDMA_UNREGISTER_EVENT: 3120 dev_warn_ratelimited(&device->dev, 3121 "Failed to send RDMA monitor unregister device event\n"); 3122 break; 3123 case RDMA_NETDEV_ATTACH_EVENT: 3124 netdev = ib_device_get_netdev(device, port_num); 3125 dev_warn_ratelimited(&device->dev, 3126 "Failed to send RDMA monitor netdev attach event: port %d netdev %d\n", 3127 port_num, netdev->ifindex); 3128 dev_put(netdev); 3129 break; 3130 case RDMA_NETDEV_DETACH_EVENT: 3131 dev_warn_ratelimited(&device->dev, 3132 "Failed to send RDMA monitor netdev detach event: port %d\n", 3133 port_num); 3134 break; 3135 case RDMA_RENAME_EVENT: 3136 dev_warn_ratelimited(&device->dev, 3137 "Failed to send RDMA monitor rename device event\n"); 3138 break; 3139 3140 case RDMA_NETDEV_RENAME_EVENT: 3141 netdev = ib_device_get_netdev(device, port_num); 3142 dev_warn_ratelimited(&device->dev, 3143 "Failed to send RDMA monitor netdev rename event: port %d netdev %d\n", 3144 port_num, netdev->ifindex); 3145 dev_put(netdev); 3146 break; 3147 default: 3148 break; 3149 } 3150 } 3151 3152 int rdma_nl_notify_event(struct ib_device *device, u32 port_num, 3153 enum rdma_nl_notify_event_type type) 3154 { 3155 struct sk_buff *skb; 3156 int ret = -EMSGSIZE; 3157 struct net *net; 3158 void *nlh; 3159 3160 net = read_pnet(&device->coredev.rdma_net); 3161 if (!net) 3162 return -EINVAL; 3163 3164 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 3165 if (!skb) 3166 return -ENOMEM; 3167 nlh = nlmsg_put(skb, 0, 0, 3168 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR), 3169 0, 0); 3170 if (!nlh) 3171 goto err_free; 3172 3173 switch (type) { 3174 case RDMA_REGISTER_EVENT: 3175 case RDMA_UNREGISTER_EVENT: 3176 case RDMA_RENAME_EVENT: 3177 ret = fill_nldev_handle(skb, device); 3178 if (ret) 3179 goto err_free; 3180 break; 3181 case RDMA_NETDEV_ATTACH_EVENT: 3182 case RDMA_NETDEV_DETACH_EVENT: 3183 ret = fill_mon_netdev_association(skb, device, port_num, net); 3184 if (ret) 3185 goto err_free; 3186 break; 3187 case RDMA_NETDEV_RENAME_EVENT: 3188 ret = fill_mon_netdev_rename(skb, device, port_num, net); 3189 if (ret) 3190 goto err_free; 3191 break; 3192 default: 3193 break; 3194 } 3195 3196 ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type); 3197 if (ret) 3198 goto err_free; 3199 3200 nlmsg_end(skb, nlh); 3201 ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL); 3202 if (ret && ret != -ESRCH) { 3203 skb = NULL; /* skb is freed in the netlink send-op handling */ 3204 goto err_free; 3205 } 3206 return 0; 3207 3208 err_free: 3209 rdma_nl_notify_err_msg(device, port_num, type); 3210 nlmsg_free(skb); 3211 return ret; 3212 } 3213 3214 void __init nldev_init(void) 3215 { 3216 rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table); 3217 } 3218 3219 void nldev_exit(void) 3220 { 3221 rdma_nl_unregister(RDMA_NL_NLDEV); 3222 } 3223 3224 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5); 3225