1 /*
2 * Copyright (c) 2007 The Regents of the University of California.
3 * Copyright (c) 2007-2009 Voltaire, Inc. All rights reserved.
4 * Copyright (c) 2009,2010 HNR Consulting. All rights reserved.
5 * Copyright (c) 2013 Lawrence Livermore National Security. All rights reserved.
6 * Copyright (c) 2011-2014 Mellanox Technologies LTD. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 */
37
38 /*
39 * Abstract:
40 * Implementation of osm_perfmgr_t.
41 * This object implements an IBA performance manager.
42 *
43 * Author:
44 * Ira Weiny, LLNL
45 */
46
47 #if HAVE_CONFIG_H
48 # include <config.h>
49 #endif /* HAVE_CONFIG_H */
50
51 #ifdef ENABLE_OSM_PERF_MGR
52 #include <stdlib.h>
53 #include <stdint.h>
54 #include <string.h>
55 #include <poll.h>
56 #include <errno.h>
57 #include <sys/time.h>
58 #include <netinet/in.h>
59 #include <float.h>
60 #include <arpa/inet.h>
61 #include <sys/socket.h>
62 #include <iba/ib_types.h>
63 #include <complib/cl_debug.h>
64 #include <complib/cl_thread.h>
65 #include <opensm/osm_file_ids.h>
66 #define FILE_ID OSM_FILE_PERFMGR_C
67 #include <vendor/osm_vendor_api.h>
68 #include <opensm/osm_perfmgr.h>
69 #include <opensm/osm_log.h>
70 #include <opensm/osm_node.h>
71 #include <opensm/osm_opensm.h>
72 #include <opensm/osm_helper.h>
73
74 #define PERFMGR_INITIAL_TID_VALUE 0xcafe
75
76 #ifdef ENABLE_OSM_PERF_MGR_PROFILE
77 struct {
78 double fastest_us;
79 double slowest_us;
80 double avg_us;
81 uint64_t num;
82 } perfmgr_mad_stats = {
83 fastest_us: DBL_MAX, slowest_us: DBL_MIN, avg_us: 0, num:0};
84
85 /* diff must be something which can fit in a susecond_t */
update_mad_stats(struct timeval * diff)86 static inline void update_mad_stats(struct timeval *diff)
87 {
88 double new = (diff->tv_sec * 1000000) + diff->tv_usec;
89 if (new < perfmgr_mad_stats.fastest_us)
90 perfmgr_mad_stats.fastest_us = new;
91 if (new > perfmgr_mad_stats.slowest_us)
92 perfmgr_mad_stats.slowest_us = new;
93
94 perfmgr_mad_stats.avg_us =
95 ((perfmgr_mad_stats.avg_us * perfmgr_mad_stats.num) + new)
96 / (perfmgr_mad_stats.num + 1);
97 perfmgr_mad_stats.num++;
98 }
99
clear_mad_stats(void)100 static inline void clear_mad_stats(void)
101 {
102 perfmgr_mad_stats.fastest_us = DBL_MAX;
103 perfmgr_mad_stats.slowest_us = DBL_MIN;
104 perfmgr_mad_stats.avg_us = 0;
105 perfmgr_mad_stats.num = 0;
106 }
107
108 /* after and diff can be the same struct */
diff_time(struct timeval * before,struct timeval * after,struct timeval * diff)109 static inline void diff_time(struct timeval *before, struct timeval *after,
110 struct timeval *diff)
111 {
112 struct timeval tmp = *after;
113 if (tmp.tv_usec < before->tv_usec) {
114 tmp.tv_sec--;
115 tmp.tv_usec += 1000000;
116 }
117 diff->tv_sec = tmp.tv_sec - before->tv_sec;
118 diff->tv_usec = tmp.tv_usec - before->tv_usec;
119 }
120 #endif
121
122 /**********************************************************************
123 * Internal helper functions
124 **********************************************************************/
init_monitored_nodes(osm_perfmgr_t * pm)125 static void init_monitored_nodes(osm_perfmgr_t * pm)
126 {
127 cl_qmap_init(&pm->monitored_map);
128 pm->remove_list = NULL;
129 cl_event_construct(&pm->sig_query);
130 cl_event_init(&pm->sig_query, FALSE);
131 }
132
mark_for_removal(osm_perfmgr_t * pm,monitored_node_t * node)133 static void mark_for_removal(osm_perfmgr_t * pm, monitored_node_t * node)
134 {
135 if (pm->remove_list) {
136 node->next = pm->remove_list;
137 pm->remove_list = node;
138 } else {
139 node->next = NULL;
140 pm->remove_list = node;
141 }
142 }
143
remove_marked_nodes(osm_perfmgr_t * pm)144 static void remove_marked_nodes(osm_perfmgr_t * pm)
145 {
146 while (pm->remove_list) {
147 monitored_node_t *next = pm->remove_list->next;
148 int port;
149
150 cl_qmap_remove_item(&pm->monitored_map,
151 (cl_map_item_t *) (pm->remove_list));
152
153 if (pm->rm_nodes)
154 perfmgr_db_delete_entry(pm->db, pm->remove_list->guid);
155 else
156 perfmgr_db_mark_active(pm->db, pm->remove_list->guid, FALSE);
157
158 if (pm->remove_list->name)
159 free(pm->remove_list->name);
160
161 for (port = pm->remove_list->esp0 ? 0 : 1;
162 port < pm->remove_list->num_ports;
163 port++) {
164 if (pm->remove_list->port[port].remote_name)
165 free(pm->remove_list->port[port].remote_name);
166 }
167
168 free(pm->remove_list);
169 pm->remove_list = next;
170 }
171 }
172
decrement_outstanding_queries(osm_perfmgr_t * pm)173 static inline void decrement_outstanding_queries(osm_perfmgr_t * pm)
174 {
175 cl_atomic_dec(&pm->outstanding_queries);
176
177 if (!pm->outstanding_queries) {
178 cl_spinlock_acquire(&pm->lock);
179 if (pm->sweep_state == PERFMGR_SWEEP_POST_PROCESSING) {
180 pm->sweep_state = PERFMGR_SWEEP_SLEEP;
181 OSM_LOG(pm->log, OSM_LOG_INFO,
182 "PM sweep state exiting Post Processing\n");
183 }
184 cl_spinlock_release(&pm->lock);
185 }
186
187 cl_event_signal(&pm->sig_query);
188 }
189
190 /**********************************************************************
191 * Receive the MAD from the vendor layer and post it for processing by
192 * the dispatcher
193 **********************************************************************/
perfmgr_mad_recv_callback(osm_madw_t * p_madw,void * bind_context,osm_madw_t * p_req_madw)194 static void perfmgr_mad_recv_callback(osm_madw_t * p_madw, void *bind_context,
195 osm_madw_t * p_req_madw)
196 {
197 osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context;
198
199 OSM_LOG_ENTER(pm->log);
200
201 CL_ASSERT(p_madw);
202 CL_ASSERT(p_req_madw != NULL);
203
204 osm_madw_copy_context(p_madw, p_req_madw);
205 osm_mad_pool_put(pm->mad_pool, p_req_madw);
206
207 decrement_outstanding_queries(pm);
208
209 /* post this message for later processing. */
210 if (cl_disp_post(pm->pc_disp_h, OSM_MSG_MAD_PORT_COUNTERS,
211 p_madw, NULL, NULL) != CL_SUCCESS) {
212 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5401: "
213 "PerfMgr Dispatcher post failed\n");
214 osm_mad_pool_put(pm->mad_pool, p_madw);
215 }
216 OSM_LOG_EXIT(pm->log);
217 }
218
219 /**********************************************************************
220 * Process MAD send errors
221 **********************************************************************/
perfmgr_mad_send_err_callback(void * bind_context,osm_madw_t * p_madw)222 static void perfmgr_mad_send_err_callback(void *bind_context,
223 osm_madw_t * p_madw)
224 {
225 osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context;
226 osm_madw_context_t *context = &p_madw->context;
227 uint64_t node_guid = context->perfmgr_context.node_guid;
228 uint8_t port = context->perfmgr_context.port;
229 cl_map_item_t *p_node;
230 monitored_node_t *p_mon_node;
231 ib_net16_t orig_lid;
232
233 OSM_LOG_ENTER(pm->log);
234
235 /*
236 * get the monitored node struct to have the printable name
237 * for log messages
238 */
239 if ((p_node = cl_qmap_get(&pm->monitored_map, node_guid)) ==
240 cl_qmap_end(&pm->monitored_map)) {
241 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5415: GUID 0x%016"
242 PRIx64 " not found in monitored map\n", node_guid);
243 goto Exit;
244 }
245 p_mon_node = (monitored_node_t *) p_node;
246
247 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5402: %s (0x%" PRIx64
248 ") port %u LID %u TID 0x%" PRIx64 "\n",
249 p_mon_node->name, p_mon_node->guid, port,
250 cl_ntoh16(p_madw->mad_addr.dest_lid),
251 cl_ntoh64(p_madw->p_mad->trans_id));
252
253 if (pm->subn->opt.perfmgr_redir && p_madw->status == IB_TIMEOUT) {
254 /* First, find the node in the monitored map */
255 cl_plock_acquire(&pm->osm->lock);
256 /* Now, validate port number */
257 if (port >= p_mon_node->num_ports) {
258 cl_plock_release(&pm->osm->lock);
259 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5416: "
260 "Invalid port num %u for %s (GUID 0x%016"
261 PRIx64 ") num ports %u\n", port,
262 p_mon_node->name, p_mon_node->guid,
263 p_mon_node->num_ports);
264 goto Exit;
265 }
266 /* Clear redirection info for this port except orig_lid */
267 orig_lid = p_mon_node->port[port].orig_lid;
268 memset(&p_mon_node->port[port], 0, sizeof(monitored_port_t));
269 p_mon_node->port[port].orig_lid = orig_lid;
270 p_mon_node->port[port].valid = TRUE;
271 cl_plock_release(&pm->osm->lock);
272 }
273
274 Exit:
275 osm_mad_pool_put(pm->mad_pool, p_madw);
276
277 decrement_outstanding_queries(pm);
278
279 OSM_LOG_EXIT(pm->log);
280 }
281
282 /**********************************************************************
283 * Bind the PerfMgr to the vendor layer for MAD sends/receives
284 **********************************************************************/
osm_perfmgr_bind(osm_perfmgr_t * pm,ib_net64_t port_guid)285 ib_api_status_t osm_perfmgr_bind(osm_perfmgr_t * pm, ib_net64_t port_guid)
286 {
287 osm_bind_info_t bind_info;
288 ib_api_status_t status = IB_SUCCESS;
289
290 OSM_LOG_ENTER(pm->log);
291
292 if (pm->bind_handle != OSM_BIND_INVALID_HANDLE) {
293 OSM_LOG(pm->log, OSM_LOG_ERROR,
294 "ERR 5403: Multiple binds not allowed\n");
295 status = IB_ERROR;
296 goto Exit;
297 }
298
299 bind_info.port_guid = pm->port_guid = port_guid;
300 bind_info.mad_class = IB_MCLASS_PERF;
301 bind_info.class_version = 1;
302 bind_info.is_responder = FALSE;
303 bind_info.is_report_processor = FALSE;
304 bind_info.is_trap_processor = FALSE;
305 bind_info.recv_q_size = OSM_PM_DEFAULT_QP1_RCV_SIZE;
306 bind_info.send_q_size = OSM_PM_DEFAULT_QP1_SEND_SIZE;
307 bind_info.timeout = pm->subn->opt.transaction_timeout;
308 bind_info.retries = pm->subn->opt.transaction_retries;
309
310 OSM_LOG(pm->log, OSM_LOG_VERBOSE,
311 "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid));
312
313 pm->bind_handle = osm_vendor_bind(pm->vendor, &bind_info, pm->mad_pool,
314 perfmgr_mad_recv_callback,
315 perfmgr_mad_send_err_callback, pm);
316
317 if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) {
318 status = IB_ERROR;
319 OSM_LOG(pm->log, OSM_LOG_ERROR,
320 "ERR 5404: Vendor specific bind failed (%s)\n",
321 ib_get_err_str(status));
322 }
323
324 Exit:
325 OSM_LOG_EXIT(pm->log);
326 return status;
327 }
328
329 /**********************************************************************
330 * Unbind the PerfMgr from the vendor layer for MAD sends/receives
331 **********************************************************************/
perfmgr_mad_unbind(osm_perfmgr_t * pm)332 static void perfmgr_mad_unbind(osm_perfmgr_t * pm)
333 {
334 OSM_LOG_ENTER(pm->log);
335 if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) {
336 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5405: No previous bind\n");
337 goto Exit;
338 }
339 osm_vendor_unbind(pm->bind_handle);
340 Exit:
341 OSM_LOG_EXIT(pm->log);
342 }
343
344 /**********************************************************************
345 * Given a monitored node and a port, return the qp
346 **********************************************************************/
get_qp(monitored_node_t * mon_node,uint8_t port)347 static ib_net32_t get_qp(monitored_node_t * mon_node, uint8_t port)
348 {
349 ib_net32_t qp = IB_QP1;
350
351 if (mon_node && mon_node->num_ports && port < mon_node->num_ports &&
352 mon_node->port[port].redirection && mon_node->port[port].qp)
353 qp = mon_node->port[port].qp;
354
355 return qp;
356 }
357
get_base_lid(osm_node_t * p_node,uint8_t port)358 static ib_net16_t get_base_lid(osm_node_t * p_node, uint8_t port)
359 {
360 switch (p_node->node_info.node_type) {
361 case IB_NODE_TYPE_CA:
362 case IB_NODE_TYPE_ROUTER:
363 return osm_node_get_base_lid(p_node, port);
364 case IB_NODE_TYPE_SWITCH:
365 return osm_node_get_base_lid(p_node, 0);
366 default:
367 return 0;
368 }
369 }
370
371 /**********************************************************************
372 * Given a node, a port, and an optional monitored node,
373 * return the lid appropriate to query that port
374 **********************************************************************/
get_lid(osm_node_t * p_node,uint8_t port,monitored_node_t * mon_node)375 static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port,
376 monitored_node_t * mon_node)
377 {
378 if (mon_node && mon_node->num_ports && port < mon_node->num_ports &&
379 mon_node->port[port].lid)
380 return mon_node->port[port].lid;
381
382 return get_base_lid(p_node, port);
383 }
384
385 /**********************************************************************
386 * Build a Performance Management class MAD
387 **********************************************************************/
perfmgr_build_mad(osm_perfmgr_t * perfmgr,ib_net16_t dest_lid,uint8_t sl,ib_net32_t dest_qp,uint16_t pkey_ix,uint8_t mad_method,ib_net16_t attr_id,osm_madw_context_t * p_context,ib_perfmgt_mad_t ** p_pm_mad)388 static osm_madw_t *perfmgr_build_mad(osm_perfmgr_t * perfmgr,
389 ib_net16_t dest_lid,
390 uint8_t sl,
391 ib_net32_t dest_qp,
392 uint16_t pkey_ix,
393 uint8_t mad_method,
394 ib_net16_t attr_id,
395 osm_madw_context_t * p_context,
396 ib_perfmgt_mad_t ** p_pm_mad)
397 {
398 ib_perfmgt_mad_t *pm_mad = NULL;
399 osm_madw_t *p_madw = NULL;
400
401 OSM_LOG_ENTER(perfmgr->log);
402
403 p_madw = osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle,
404 MAD_BLOCK_SIZE, NULL);
405 if (p_madw == NULL)
406 return NULL;
407
408 pm_mad = osm_madw_get_perfmgt_mad_ptr(p_madw);
409
410 /* build the mad */
411 pm_mad->header.base_ver = 1;
412 pm_mad->header.mgmt_class = IB_MCLASS_PERF;
413 pm_mad->header.class_ver = 1;
414 pm_mad->header.method = mad_method;
415 pm_mad->header.status = 0;
416 pm_mad->header.class_spec = 0;
417 pm_mad->header.trans_id =
418 cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) &
419 (uint64_t) (0xFFFFFFFF));
420 if (perfmgr->trans_id == 0)
421 pm_mad->header.trans_id =
422 cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) &
423 (uint64_t) (0xFFFFFFFF));
424 pm_mad->header.attr_id = attr_id;
425 pm_mad->header.resv = 0;
426 pm_mad->header.attr_mod = 0;
427
428 p_madw->mad_addr.dest_lid = dest_lid;
429 p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp;
430 p_madw->mad_addr.addr_type.gsi.remote_qkey =
431 cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
432 p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix;
433 p_madw->mad_addr.addr_type.gsi.service_level = sl;
434 p_madw->mad_addr.addr_type.gsi.global_route = FALSE;
435 p_madw->resp_expected = TRUE;
436
437 if (p_context)
438 p_madw->context = *p_context;
439
440 if (p_pm_mad)
441 *p_pm_mad = pm_mad;
442
443 OSM_LOG_EXIT(perfmgr->log);
444
445 return (p_madw);
446 }
447
448 /**********************************************************************
449 * Send a Performance Management class MAD
450 **********************************************************************/
perfmgr_send_mad(osm_perfmgr_t * perfmgr,osm_madw_t * const p_madw)451 static ib_api_status_t perfmgr_send_mad(osm_perfmgr_t *perfmgr,
452 osm_madw_t * const p_madw)
453 {
454 cl_status_t sts;
455 ib_api_status_t status = osm_vendor_send(perfmgr->bind_handle, p_madw,
456 TRUE);
457 if (status == IB_SUCCESS) {
458 /* pause thread if there are too many outstanding requests */
459 cl_atomic_inc(&(perfmgr->outstanding_queries));
460 while (perfmgr->outstanding_queries >
461 (int32_t)perfmgr->max_outstanding_queries) {
462 cl_spinlock_acquire(&perfmgr->lock);
463 if (perfmgr->sweep_state == PERFMGR_SWEEP_SLEEP) {
464 perfmgr->sweep_state = PERFMGR_SWEEP_POST_PROCESSING;
465 OSM_LOG(perfmgr->log, OSM_LOG_INFO,
466 "PM sweep state going into Post Processing\n");
467 } else if (perfmgr->sweep_state == PERFMGR_SWEEP_ACTIVE)
468 perfmgr->sweep_state = PERFMGR_SWEEP_SUSPENDED;
469 cl_spinlock_release(&perfmgr->lock);
470 wait:
471 sts = cl_event_wait_on(&perfmgr->sig_query,
472 EVENT_NO_TIMEOUT, TRUE);
473 if (sts != CL_SUCCESS)
474 goto wait;
475
476 cl_spinlock_acquire(&perfmgr->lock);
477 if (perfmgr->sweep_state == PERFMGR_SWEEP_SUSPENDED)
478 perfmgr->sweep_state = PERFMGR_SWEEP_ACTIVE;
479 cl_spinlock_release(&perfmgr->lock);
480 }
481 }
482 return (status);
483 }
484
485
486 /**********************************************************************
487 * Form and send the PortCounters MAD for a single port
488 **********************************************************************/
perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,ib_net16_t dest_lid,ib_net32_t dest_qp,uint16_t pkey_ix,uint8_t port,uint8_t mad_method,uint16_t counter_select,uint8_t counter_select2,osm_madw_context_t * p_context,uint8_t sl)489 static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
490 ib_net16_t dest_lid,
491 ib_net32_t dest_qp, uint16_t pkey_ix,
492 uint8_t port, uint8_t mad_method,
493 uint16_t counter_select,
494 uint8_t counter_select2,
495 osm_madw_context_t * p_context,
496 uint8_t sl)
497 {
498 ib_api_status_t status = IB_SUCCESS;
499 ib_port_counters_t *port_counter = NULL;
500 ib_perfmgt_mad_t *pm_mad = NULL;
501 osm_madw_t *p_madw = NULL;
502
503 OSM_LOG_ENTER(perfmgr->log);
504
505 p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_PORT_CNTRS;
506 p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix,
507 mad_method, IB_MAD_ATTR_PORT_CNTRS, p_context,
508 &pm_mad);
509 if (p_madw == NULL)
510 return IB_INSUFFICIENT_MEMORY;
511
512 port_counter = (ib_port_counters_t *) & pm_mad->data;
513 memset(port_counter, 0, sizeof(*port_counter));
514 port_counter->port_select = port;
515 port_counter->counter_select = cl_hton16(counter_select);
516 port_counter->counter_select2 = counter_select2;
517
518 status = perfmgr_send_mad(perfmgr, p_madw);
519
520 OSM_LOG_EXIT(perfmgr->log);
521 return status;
522 }
523
524 /**********************************************************************
525 * sweep the node_guid_tbl and collect the node guids to be tracked
526 **********************************************************************/
collect_guids(cl_map_item_t * p_map_item,void * context)527 static void collect_guids(cl_map_item_t * p_map_item, void *context)
528 {
529 osm_node_t *node = (osm_node_t *) p_map_item;
530 uint64_t node_guid = cl_ntoh64(node->node_info.node_guid);
531 osm_perfmgr_t *pm = (osm_perfmgr_t *) context;
532 monitored_node_t *mon_node = NULL;
533 uint32_t num_ports;
534 int port;
535
536 OSM_LOG_ENTER(pm->log);
537
538 if (cl_qmap_get(&pm->monitored_map, node_guid) ==
539 cl_qmap_end(&pm->monitored_map)) {
540
541 if (pm->ignore_cas &&
542 (node->node_info.node_type == IB_NODE_TYPE_CA))
543 goto Exit;
544
545 /* if not already in map add it */
546 num_ports = osm_node_get_num_physp(node);
547 mon_node = malloc(sizeof(*mon_node) +
548 sizeof(monitored_port_t) * num_ports);
549 if (!mon_node) {
550 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5406: "
551 "malloc failed: not handling node %s"
552 "(GUID 0x%" PRIx64 ")\n", node->print_desc,
553 node_guid);
554 goto Exit;
555 }
556 memset(mon_node, 0,
557 sizeof(*mon_node) + sizeof(monitored_port_t) * num_ports);
558 mon_node->guid = node_guid;
559 mon_node->name = strdup(node->print_desc);
560 mon_node->num_ports = num_ports;
561 mon_node->node_type = node->node_info.node_type;
562 /* check for enhanced switch port 0 */
563 mon_node->esp0 = (node->sw &&
564 ib_switch_info_is_enhanced_port0(&node->sw->
565 switch_info));
566 for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) {
567 monitored_port_t *mon_port = &mon_node->port[port];
568 osm_physp_t *p_physp = &node->physp_table[port];
569 osm_physp_t *p_remote_physp = p_physp->p_remote_physp;
570
571 mon_port->orig_lid = 0;
572 mon_port->valid = FALSE;
573 if (osm_physp_is_valid(p_physp)) {
574 mon_port->orig_lid = get_base_lid(node, port);
575 mon_port->valid = TRUE;
576 }
577 mon_port->remote_valid = FALSE;
578 mon_port->remote_name = NULL;
579 if (p_remote_physp && osm_physp_is_valid(p_remote_physp)) {
580 osm_node_t *p_remote_node = p_remote_physp->p_node;
581 mon_port->remote_valid = TRUE;
582 mon_port->remote_guid = p_remote_node->node_info.node_guid;
583 mon_port->remote_name = strdup(p_remote_node->print_desc);
584 mon_port->remote_port = p_remote_physp->port_num;
585 }
586 }
587
588 cl_qmap_insert(&pm->monitored_map, node_guid,
589 (cl_map_item_t *) mon_node);
590 }
591
592 Exit:
593 OSM_LOG_EXIT(pm->log);
594 }
595
596 /**********************************************************************
597 * Form and send the ClassPortInfo MAD for a single port
598 **********************************************************************/
perfmgr_send_cpi_mad(osm_perfmgr_t * pm,ib_net16_t dest_lid,ib_net32_t dest_qp,uint16_t pkey_ix,uint8_t port,osm_madw_context_t * p_context,uint8_t sl)599 static ib_api_status_t perfmgr_send_cpi_mad(osm_perfmgr_t * pm,
600 ib_net16_t dest_lid,
601 ib_net32_t dest_qp,
602 uint16_t pkey_ix,
603 uint8_t port,
604 osm_madw_context_t * p_context,
605 uint8_t sl)
606 {
607 ib_api_status_t status = IB_SUCCESS;
608 osm_madw_t *p_madw = NULL;
609
610 OSM_LOG_ENTER(pm->log);
611
612 p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_CLASS_PORT_INFO;
613 p_madw = perfmgr_build_mad(pm, dest_lid, sl, dest_qp,
614 pkey_ix, IB_MAD_METHOD_GET,
615 IB_MAD_ATTR_CLASS_PORT_INFO, p_context,
616 NULL);
617 if (p_madw == NULL)
618 return IB_INSUFFICIENT_MEMORY;
619
620 status = perfmgr_send_mad(pm, p_madw);
621
622 OSM_LOG_EXIT(pm->log);
623 return status;
624 }
625
626 /**********************************************************************
627 * return if some form of PortCountersExtended (PCE || PCE NoIETF) are supported
628 **********************************************************************/
pce_supported(monitored_node_t * mon_node,uint8_t port)629 static inline boolean_t pce_supported(monitored_node_t *mon_node, uint8_t port)
630 {
631 monitored_port_t *mon_port = &(mon_node->port[port]);
632 return (mon_port->cpi_valid
633 && (mon_port->cap_mask & IB_PM_EXT_WIDTH_SUPPORTED
634 || mon_port->cap_mask & IB_PM_EXT_WIDTH_NOIETF_SUP));
635 }
636
637 /**********************************************************************
638 * return if CapMask.PortCountersXmitWaitSupported is set
639 **********************************************************************/
xmit_wait_supported(monitored_node_t * mon_node,uint8_t port)640 static inline boolean_t xmit_wait_supported(monitored_node_t *mon_node, uint8_t port)
641 {
642 monitored_port_t *mon_port = &(mon_node->port[port]);
643 return (mon_port->cpi_valid
644 && (mon_port->cap_mask & IB_PM_PC_XMIT_WAIT_SUP));
645 }
646
647 /**********************************************************************
648 * return if "full" PortCountersExtended (IETF) is indicated
649 **********************************************************************/
ietf_supported(monitored_node_t * mon_node,uint8_t port)650 static inline boolean_t ietf_supported(monitored_node_t *mon_node, uint8_t port)
651 {
652 monitored_port_t *mon_port = &(mon_node->port[port]);
653 return (mon_port->cpi_valid
654 && (mon_port->cap_mask & IB_PM_EXT_WIDTH_SUPPORTED));
655 }
656
657 /**********************************************************************
658 * Form and send the PortCountersExtended MAD for a single port
659 **********************************************************************/
perfmgr_send_pce_mad(osm_perfmgr_t * perfmgr,ib_net16_t dest_lid,ib_net32_t dest_qp,uint16_t pkey_ix,uint8_t port,uint8_t mad_method,osm_madw_context_t * p_context,uint8_t sl)660 static ib_api_status_t perfmgr_send_pce_mad(osm_perfmgr_t * perfmgr,
661 ib_net16_t dest_lid,
662 ib_net32_t dest_qp,
663 uint16_t pkey_ix,
664 uint8_t port, uint8_t mad_method,
665 osm_madw_context_t * p_context,
666 uint8_t sl)
667 {
668 ib_api_status_t status = IB_SUCCESS;
669 ib_port_counters_ext_t *port_counter_ext = NULL;
670 ib_perfmgt_mad_t *pm_mad = NULL;
671 osm_madw_t *p_madw = NULL;
672
673 OSM_LOG_ENTER(perfmgr->log);
674
675 p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_PORT_CNTRS_EXT;
676 p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix,
677 mad_method, IB_MAD_ATTR_PORT_CNTRS_EXT, p_context,
678 &pm_mad);
679 if (p_madw == NULL)
680 return IB_INSUFFICIENT_MEMORY;
681
682 port_counter_ext = (ib_port_counters_ext_t *) & pm_mad->data;
683 memset(port_counter_ext, 0, sizeof(*port_counter_ext));
684 port_counter_ext->port_select = port;
685 port_counter_ext->counter_select = cl_hton16(0x00FF);
686
687 status = perfmgr_send_mad(perfmgr, p_madw);
688
689 OSM_LOG_EXIT(perfmgr->log);
690 return status;
691 }
692
693 /**********************************************************************
694 * query the Port Counters of all the nodes in the subnet
695 **********************************************************************/
perfmgr_query_counters(cl_map_item_t * p_map_item,void * context)696 static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
697 {
698 ib_api_status_t status = IB_SUCCESS;
699 osm_perfmgr_t *pm = context;
700 osm_node_t *node = NULL;
701 monitored_node_t *mon_node = (monitored_node_t *) p_map_item;
702 osm_madw_context_t mad_context;
703 uint64_t node_guid = 0;
704 ib_net32_t remote_qp;
705 uint8_t port, num_ports = 0;
706
707 OSM_LOG_ENTER(pm->log);
708
709 cl_plock_acquire(&pm->osm->lock);
710 node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid));
711 if (!node) {
712 OSM_LOG(pm->log, OSM_LOG_ERROR,
713 "ERR 5407: Node \"%s\" (guid 0x%" PRIx64
714 ") no longer exists so removing from PerfMgr monitoring\n",
715 mon_node->name, mon_node->guid);
716 mark_for_removal(pm, mon_node);
717 goto Exit;
718 }
719
720 num_ports = osm_node_get_num_physp(node);
721 node_guid = cl_ntoh64(node->node_info.node_guid);
722
723 /* make sure there is a database object ready to store this info */
724 if (perfmgr_db_create_entry(pm->db, node_guid, mon_node->esp0,
725 num_ports, node->print_desc) !=
726 PERFMGR_EVENT_DB_SUCCESS) {
727 OSM_LOG(pm->log, OSM_LOG_ERROR,
728 "ERR 5408: DB create entry failed for 0x%"
729 PRIx64 " (%s) : %s\n", node_guid, node->print_desc,
730 strerror(errno));
731 goto Exit;
732 }
733
734 perfmgr_db_mark_active(pm->db, node_guid, TRUE);
735
736 /* issue the query for each port */
737 for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) {
738 ib_net16_t lid;
739
740 if (!osm_node_get_physp_ptr(node, port))
741 continue;
742
743 if (!mon_node->port[port].valid)
744 continue;
745
746 lid = get_lid(node, port, mon_node);
747 if (lid == 0) {
748 OSM_LOG(pm->log, OSM_LOG_DEBUG, "WARN: node 0x%" PRIx64
749 " port %d (%s): port out of range, skipping\n",
750 cl_ntoh64(node->node_info.node_guid), port,
751 node->print_desc);
752 continue;
753 }
754
755 remote_qp = get_qp(mon_node, port);
756
757 mad_context.perfmgr_context.node_guid = node_guid;
758 mad_context.perfmgr_context.port = port;
759 mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET;
760
761 if (pm->query_cpi && !mon_node->port[port].cpi_valid) {
762 status = perfmgr_send_cpi_mad(pm, lid, remote_qp,
763 mon_node->port[port].pkey_ix,
764 port, &mad_context,
765 0); /* FIXME SL != 0 */
766 if (status != IB_SUCCESS)
767 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: "
768 "Failed to issue ClassPortInfo query "
769 "for node 0x%" PRIx64
770 " port %d (%s)\n",
771 node->node_info.node_guid, port,
772 node->print_desc);
773 if (mon_node->node_type == IB_NODE_TYPE_SWITCH)
774 goto Exit; /* only need to issue 1 CPI query
775 for switches */
776 } else {
777
778 #ifdef ENABLE_OSM_PERF_MGR_PROFILE
779 gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
780 #endif
781 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
782 PRIx64 " port %d (lid %u) (%s)\n",
783 node_guid, port, cl_ntoh16(lid),
784 node->print_desc);
785 status = perfmgr_send_pc_mad(pm, lid, remote_qp,
786 mon_node->port[port].pkey_ix,
787 port, IB_MAD_METHOD_GET,
788 0xffff,
789 1,
790 &mad_context,
791 0); /* FIXME SL != 0 */
792 if (status != IB_SUCCESS)
793 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: "
794 "Failed to issue port counter query for node 0x%"
795 PRIx64 " port %d (%s)\n",
796 node->node_info.node_guid, port,
797 node->print_desc);
798
799 if (pce_supported(mon_node, port)) {
800
801 #if ENABLE_OSM_PERF_MGR_PROFILE
802 gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
803 #endif
804 status = perfmgr_send_pce_mad(pm, lid, remote_qp,
805 mon_node->port[port].pkey_ix,
806 port,
807 IB_MAD_METHOD_GET,
808 &mad_context,
809 0); /* FIXME SL != 0 */
810 if (status != IB_SUCCESS)
811 OSM_LOG(pm->log, OSM_LOG_ERROR,
812 "ERR 5417: Failed to issue "
813 "port counter query for "
814 "node 0x%" PRIx64 " port "
815 "%d (%s)\n",
816 node->node_info.node_guid,
817 port,
818 node->print_desc);
819 }
820 }
821 }
822 Exit:
823 cl_plock_release(&pm->osm->lock);
824 OSM_LOG_EXIT(pm->log);
825 }
826
827 /**********************************************************************
828 * Discovery stuff
829 * This code should not be here, but merged with main OpenSM
830 **********************************************************************/
831 extern int wait_for_pending_transactions(osm_stats_t * stats);
832 extern void osm_drop_mgr_process(IN osm_sm_t * sm);
833
sweep_hop_1(osm_sm_t * sm)834 static int sweep_hop_1(osm_sm_t * sm)
835 {
836 ib_api_status_t status = IB_SUCCESS;
837 osm_madw_context_t context;
838 osm_node_t *p_node;
839 osm_port_t *p_port;
840 osm_dr_path_t hop_1_path;
841 ib_net64_t port_guid;
842 uint8_t port_num;
843 uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX];
844 uint8_t num_ports;
845 osm_physp_t *p_ext_physp;
846
847 port_guid = sm->p_subn->sm_port_guid;
848
849 p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
850 if (!p_port) {
851 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
852 "ERR 5481: No SM port object\n");
853 return -1;
854 }
855
856 p_node = p_port->p_node;
857 port_num = ib_node_info_get_local_port_num(&p_node->node_info);
858
859 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
860 "Probing hop 1 on local port %u\n", port_num);
861
862 memset(path_array, 0, sizeof(path_array));
863 /* the hop_1 operations depend on the type of our node.
864 * Currently - legal nodes that can host SM are SW and CA */
865 switch (osm_node_get_type(p_node)) {
866 case IB_NODE_TYPE_CA:
867 case IB_NODE_TYPE_ROUTER:
868 memset(&context, 0, sizeof(context));
869 context.ni_context.node_guid = osm_node_get_node_guid(p_node);
870 context.ni_context.port_num = port_num;
871
872 path_array[1] = port_num;
873
874 osm_dr_path_init(&hop_1_path, 1, path_array);
875 CL_PLOCK_ACQUIRE(sm->p_lock);
876 status = osm_req_get(sm, &hop_1_path, IB_MAD_ATTR_NODE_INFO, 0,
877 TRUE, 0, CL_DISP_MSGID_NONE, &context);
878 CL_PLOCK_RELEASE(sm->p_lock);
879
880 if (status != IB_SUCCESS)
881 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5482: "
882 "Request for NodeInfo failed\n");
883 break;
884
885 case IB_NODE_TYPE_SWITCH:
886 /* Need to go over all the ports of the switch, and send a node_info
887 * from them. This doesn't include the port 0 of the switch, which
888 * hosts the SM.
889 * Note: We'll send another switchInfo on port 0, since if no ports
890 * are connected, we still want to get some response, and have the
891 * subnet come up.
892 */
893 num_ports = osm_node_get_num_physp(p_node);
894 for (port_num = 0; port_num < num_ports; port_num++) {
895 /* go through the port only if the port is not DOWN */
896 p_ext_physp = osm_node_get_physp_ptr(p_node, port_num);
897 if (!p_ext_physp || ib_port_info_get_port_state
898 (&p_ext_physp->port_info) <= IB_LINK_DOWN)
899 continue;
900
901 memset(&context, 0, sizeof(context));
902 context.ni_context.node_guid =
903 osm_node_get_node_guid(p_node);
904 context.ni_context.port_num = port_num;
905
906 path_array[1] = port_num;
907
908 osm_dr_path_init(&hop_1_path, 1, path_array);
909 CL_PLOCK_ACQUIRE(sm->p_lock);
910 status = osm_req_get(sm, &hop_1_path,
911 IB_MAD_ATTR_NODE_INFO, 0, TRUE, 0,
912 CL_DISP_MSGID_NONE, &context);
913 CL_PLOCK_RELEASE(sm->p_lock);
914
915 if (status != IB_SUCCESS)
916 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5484: "
917 "Request for NodeInfo failed\n");
918 }
919 break;
920
921 default:
922 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
923 "ERR 5483: Unknown node type %d\n",
924 osm_node_get_type(p_node));
925 }
926
927 return status;
928 }
929
is_sm_port_down(osm_sm_t * sm)930 static unsigned is_sm_port_down(osm_sm_t * sm)
931 {
932 ib_net64_t port_guid;
933 osm_port_t *p_port;
934
935 port_guid = sm->p_subn->sm_port_guid;
936 if (port_guid == 0)
937 return 1;
938
939 CL_PLOCK_ACQUIRE(sm->p_lock);
940 p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
941 if (!p_port) {
942 CL_PLOCK_RELEASE(sm->p_lock);
943 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5485: "
944 "SM port with GUID:%016" PRIx64 " is unknown\n",
945 cl_ntoh64(port_guid));
946 return 1;
947 }
948 CL_PLOCK_RELEASE(sm->p_lock);
949
950 if (p_port->p_node->sw &&
951 !ib_switch_info_is_enhanced_port0(&p_port->p_node->sw->switch_info))
952 return 0; /* base SP0 */
953
954 return osm_physp_get_port_state(p_port->p_physp) == IB_LINK_DOWN;
955 }
956
sweep_hop_0(osm_sm_t * sm)957 static int sweep_hop_0(osm_sm_t * sm)
958 {
959 ib_api_status_t status;
960 osm_dr_path_t dr_path;
961 osm_bind_handle_t h_bind;
962 uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX];
963
964 memset(path_array, 0, sizeof(path_array));
965
966 h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl);
967 if (h_bind == OSM_BIND_INVALID_HANDLE) {
968 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "No bound ports\n");
969 return -1;
970 }
971
972 osm_dr_path_init(&dr_path, 0, path_array);
973 CL_PLOCK_ACQUIRE(sm->p_lock);
974 status = osm_req_get(sm, &dr_path, IB_MAD_ATTR_NODE_INFO, 0,
975 TRUE, 0, CL_DISP_MSGID_NONE, NULL);
976 CL_PLOCK_RELEASE(sm->p_lock);
977
978 if (status != IB_SUCCESS)
979 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
980 "ERR 5486: Request for NodeInfo failed\n");
981
982 return status;
983 }
984
reset_node_count(cl_map_item_t * p_map_item,void * cxt)985 static void reset_node_count(cl_map_item_t * p_map_item, void *cxt)
986 {
987 osm_node_t *p_node = (osm_node_t *) p_map_item;
988 p_node->discovery_count = 0;
989
990 memset(p_node->physp_discovered, 0,
991 sizeof(uint8_t) * p_node->physp_tbl_size);
992 }
993
reset_port_count(cl_map_item_t * p_map_item,void * cxt)994 static void reset_port_count(cl_map_item_t * p_map_item, void *cxt)
995 {
996 osm_port_t *p_port = (osm_port_t *) p_map_item;
997 p_port->discovery_count = 0;
998 }
999
reset_switch_count(cl_map_item_t * p_map_item,void * cxt)1000 static void reset_switch_count(cl_map_item_t * p_map_item, void *cxt)
1001 {
1002 osm_switch_t *p_sw = (osm_switch_t *) p_map_item;
1003 p_sw->need_update = 0;
1004 }
1005
perfmgr_discovery(osm_opensm_t * osm)1006 static int perfmgr_discovery(osm_opensm_t * osm)
1007 {
1008 int ret;
1009
1010 CL_PLOCK_ACQUIRE(&osm->lock);
1011 cl_qmap_apply_func(&osm->subn.node_guid_tbl, reset_node_count, NULL);
1012 cl_qmap_apply_func(&osm->subn.port_guid_tbl, reset_port_count, NULL);
1013 cl_qmap_apply_func(&osm->subn.sw_guid_tbl, reset_switch_count, NULL);
1014 CL_PLOCK_RELEASE(&osm->lock);
1015
1016 osm->subn.in_sweep_hop_0 = TRUE;
1017
1018 ret = sweep_hop_0(&osm->sm);
1019 if (ret)
1020 goto _exit;
1021
1022 if (wait_for_pending_transactions(&osm->stats))
1023 goto _exit;
1024
1025 if (is_sm_port_down(&osm->sm)) {
1026 OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "SM port is down\n");
1027 goto _drop;
1028 }
1029
1030 osm->subn.in_sweep_hop_0 = FALSE;
1031
1032 ret = sweep_hop_1(&osm->sm);
1033 if (ret)
1034 goto _exit;
1035
1036 if (wait_for_pending_transactions(&osm->stats))
1037 goto _exit;
1038
1039 _drop:
1040 osm_drop_mgr_process(&osm->sm);
1041
1042 _exit:
1043 return ret;
1044 }
1045
1046 /**********************************************************************
1047 * Main PerfMgr processor - query the performance counters
1048 **********************************************************************/
osm_perfmgr_process(osm_perfmgr_t * pm)1049 void osm_perfmgr_process(osm_perfmgr_t * pm)
1050 {
1051 #ifdef ENABLE_OSM_PERF_MGR_PROFILE
1052 struct timeval before, after;
1053 #endif
1054
1055 if (pm->state != PERFMGR_STATE_ENABLED)
1056 return;
1057
1058 cl_spinlock_acquire(&pm->lock);
1059 if (pm->sweep_state == PERFMGR_SWEEP_ACTIVE ||
1060 pm->sweep_state == PERFMGR_SWEEP_SUSPENDED ||
1061 pm->sweep_state == PERFMGR_SWEEP_POST_PROCESSING) {
1062 cl_spinlock_release(&pm->lock);
1063 OSM_LOG(pm->log, OSM_LOG_INFO,
1064 "PM sweep state %d, skipping sweep\n",
1065 pm->sweep_state);
1066 return;
1067 }
1068
1069 pm->sweep_state = PERFMGR_SWEEP_ACTIVE;
1070 cl_spinlock_release(&pm->lock);
1071
1072 if (pm->subn->sm_state == IB_SMINFO_STATE_STANDBY ||
1073 pm->subn->sm_state == IB_SMINFO_STATE_NOTACTIVE)
1074 perfmgr_discovery(pm->subn->p_osm);
1075
1076 /* if redirection enabled, determine local port */
1077 if (pm->subn->opt.perfmgr_redir && pm->local_port == -1) {
1078 osm_node_t *p_node;
1079 osm_port_t *p_port;
1080
1081 CL_PLOCK_ACQUIRE(pm->sm->p_lock);
1082 p_port = osm_get_port_by_guid(pm->subn, pm->port_guid);
1083 if (p_port) {
1084 p_node = p_port->p_node;
1085 CL_ASSERT(p_node);
1086 pm->local_port =
1087 ib_node_info_get_local_port_num(&p_node->node_info);
1088 } else
1089 OSM_LOG(pm->log, OSM_LOG_ERROR,
1090 "ERR 5487: No PerfMgr port object for "
1091 "port GUID 0x%" PRIx64 "\n",
1092 cl_ntoh64(pm->port_guid));
1093 CL_PLOCK_RELEASE(pm->sm->p_lock);
1094 }
1095
1096 #ifdef ENABLE_OSM_PERF_MGR_PROFILE
1097 gettimeofday(&before, NULL);
1098 #endif
1099 /* With the global lock held, collect the node guids */
1100 /* FIXME we should be able to track SA notices
1101 * and not have to sweep the node_guid_tbl each pass
1102 */
1103 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Gathering PerfMgr stats\n");
1104 cl_plock_acquire(&pm->osm->lock);
1105 cl_qmap_apply_func(&pm->subn->node_guid_tbl, collect_guids, pm);
1106 cl_plock_release(&pm->osm->lock);
1107
1108 /* then for each node query their counters */
1109 cl_qmap_apply_func(&pm->monitored_map, perfmgr_query_counters, pm);
1110
1111 /* clean out any nodes found to be removed during the sweep */
1112 remove_marked_nodes(pm);
1113
1114 #ifdef ENABLE_OSM_PERF_MGR_PROFILE
1115 gettimeofday(&after, NULL);
1116 diff_time(&before, &after, &after);
1117 osm_log_v2(pm->log, OSM_LOG_INFO, FILE_ID,
1118 "PerfMgr total sweep time : %ld.%06ld s\n"
1119 " fastest mad : %g us\n"
1120 " slowest mad : %g us\n"
1121 " average mad : %g us\n",
1122 after.tv_sec, after.tv_usec, perfmgr_mad_stats.fastest_us,
1123 perfmgr_mad_stats.slowest_us, perfmgr_mad_stats.avg_us);
1124 clear_mad_stats();
1125 #endif
1126
1127 cl_spinlock_acquire(&pm->lock);
1128 pm->sweep_state = PERFMGR_SWEEP_SLEEP;
1129 cl_spinlock_release(&pm->lock);
1130 }
1131
1132 /**********************************************************************
1133 * PerfMgr timer - loop continuously and signal SM to run PerfMgr
1134 * processor if enabled
1135 **********************************************************************/
perfmgr_sweep(void * arg)1136 static void perfmgr_sweep(void *arg)
1137 {
1138 osm_perfmgr_t *pm = arg;
1139
1140 osm_sm_signal(pm->sm, OSM_SIGNAL_PERFMGR_SWEEP);
1141 cl_timer_start(&pm->sweep_timer, pm->sweep_time_s * 1000);
1142 }
1143
osm_perfmgr_shutdown(osm_perfmgr_t * pm)1144 void osm_perfmgr_shutdown(osm_perfmgr_t * pm)
1145 {
1146 OSM_LOG_ENTER(pm->log);
1147 cl_timer_stop(&pm->sweep_timer);
1148 cl_disp_unregister(pm->pc_disp_h);
1149 perfmgr_mad_unbind(pm);
1150 OSM_LOG_EXIT(pm->log);
1151 }
1152
osm_perfmgr_destroy(osm_perfmgr_t * pm)1153 void osm_perfmgr_destroy(osm_perfmgr_t * pm)
1154 {
1155 OSM_LOG_ENTER(pm->log);
1156 perfmgr_db_destroy(pm->db);
1157 cl_timer_destroy(&pm->sweep_timer);
1158 OSM_LOG_EXIT(pm->log);
1159 }
1160
1161 /**********************************************************************
1162 * Detect if someone else on the network could have cleared the counters
1163 * without us knowing. This is easy to detect because the counters never
1164 * wrap but are "sticky".
1165 *
1166 * The one time this will not work is if the port is getting errors fast
1167 * enough to have the reading overtake the previous reading. In this case,
1168 * counters will be missed.
1169 **********************************************************************/
perfmgr_check_oob_clear(osm_perfmgr_t * pm,monitored_node_t * mon_node,uint8_t port,perfmgr_db_err_reading_t * cr)1170 static void perfmgr_check_oob_clear(osm_perfmgr_t * pm,
1171 monitored_node_t * mon_node, uint8_t port,
1172 perfmgr_db_err_reading_t * cr)
1173 {
1174 perfmgr_db_err_reading_t prev_err;
1175
1176 if (perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_err)
1177 != PERFMGR_EVENT_DB_SUCCESS) {
1178 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous "
1179 "error reading for %s (guid 0x%" PRIx64 ") port %u\n",
1180 mon_node->name, mon_node->guid, port);
1181 return;
1182 }
1183
1184 OSM_LOG(pm->log, OSM_LOG_DEBUG,
1185 "Errors vs previous node %s (0x%" PRIx64 ") port %u\n"
1186 "SE: %"PRIu64" ?< %"PRIu64"\n"
1187 "LE: %"PRIu64" ?< %"PRIu64"\n"
1188 "LD: %"PRIu64" ?< %"PRIu64"\n"
1189 "RE: %"PRIu64" ?< %"PRIu64"\n"
1190 "RPE: %"PRIu64" ?< %"PRIu64"\n"
1191 "SRE: %"PRIu64" ?< %"PRIu64"\n"
1192 "XD: %"PRIu64" ?< %"PRIu64"\n"
1193 "XCE: %"PRIu64" ?< %"PRIu64"\n"
1194 "RCE: %"PRIu64" ?< %"PRIu64"\n"
1195 "LI: %"PRIu64" ?< %"PRIu64"\n"
1196 "BO: %"PRIu64" ?< %"PRIu64"\n"
1197 "VL15: %"PRIu64" ?< %"PRIu64"\n"
1198 "XW: %"PRIu64" ?< %"PRIu64"\n"
1199 ,
1200 mon_node->name, mon_node->guid, port,
1201 cr->symbol_err_cnt, prev_err.symbol_err_cnt,
1202 cr->link_err_recover, prev_err.link_err_recover,
1203 cr->link_downed, prev_err.link_downed,
1204 cr->rcv_err, prev_err.rcv_err,
1205 cr->rcv_rem_phys_err, prev_err.rcv_rem_phys_err,
1206 cr->rcv_switch_relay_err, prev_err.rcv_switch_relay_err,
1207 cr->xmit_discards, prev_err.xmit_discards,
1208 cr->xmit_constraint_err, prev_err.xmit_constraint_err,
1209 cr->rcv_constraint_err, prev_err.rcv_constraint_err,
1210 cr->link_integrity, prev_err.link_integrity,
1211 cr->buffer_overrun, prev_err.buffer_overrun,
1212 cr->vl15_dropped, prev_err.vl15_dropped,
1213 cr->xmit_wait, prev_err.xmit_wait);
1214
1215 if (cr->symbol_err_cnt < prev_err.symbol_err_cnt ||
1216 cr->link_err_recover < prev_err.link_err_recover ||
1217 cr->link_downed < prev_err.link_downed ||
1218 cr->rcv_err < prev_err.rcv_err ||
1219 cr->rcv_rem_phys_err < prev_err.rcv_rem_phys_err ||
1220 cr->rcv_switch_relay_err < prev_err.rcv_switch_relay_err ||
1221 cr->xmit_discards < prev_err.xmit_discards ||
1222 cr->xmit_constraint_err < prev_err.xmit_constraint_err ||
1223 cr->rcv_constraint_err < prev_err.rcv_constraint_err ||
1224 cr->link_integrity < prev_err.link_integrity ||
1225 cr->buffer_overrun < prev_err.buffer_overrun ||
1226 cr->vl15_dropped < prev_err.vl15_dropped ||
1227 cr->xmit_wait < prev_err.xmit_wait) {
1228 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 540A: "
1229 "Detected an out of band error clear "
1230 "on %s (0x%" PRIx64 ") port %u\n",
1231 mon_node->name, mon_node->guid, port);
1232 perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port);
1233 }
1234 }
1235
1236 /**********************************************************************
1237 * Return 1 if the value is "close" to overflowing
1238 * "close" is defined at 25% for now
1239 **********************************************************************/
counter_overflow_4(uint8_t val)1240 static int counter_overflow_4(uint8_t val)
1241 {
1242 return (val >= 10);
1243 }
1244
counter_overflow_8(uint8_t val)1245 static int counter_overflow_8(uint8_t val)
1246 {
1247 return (val >= (UINT8_MAX - (UINT8_MAX / 4)));
1248 }
1249
counter_overflow_16(ib_net16_t val)1250 static int counter_overflow_16(ib_net16_t val)
1251 {
1252 return (cl_ntoh16(val) >= (UINT16_MAX - (UINT16_MAX / 4)));
1253 }
1254
counter_overflow_32(ib_net32_t val)1255 static int counter_overflow_32(ib_net32_t val)
1256 {
1257 return (cl_ntoh32(val) >= (UINT32_MAX - (UINT32_MAX / 4)));
1258 }
1259
counter_overflow_64(ib_net64_t val)1260 static int counter_overflow_64(ib_net64_t val)
1261 {
1262 return (cl_ntoh64(val) >= (UINT64_MAX - (UINT64_MAX / 4)));
1263 }
1264
1265 /**********************************************************************
1266 * Check if the port counters have overflowed and if so issue a clear
1267 * MAD to the port
1268 **********************************************************************/
perfmgr_check_overflow(osm_perfmgr_t * pm,monitored_node_t * mon_node,int16_t pkey_ix,uint8_t port,ib_port_counters_t * pc,boolean_t xmit_wait_sup)1269 static void perfmgr_check_overflow(osm_perfmgr_t * pm,
1270 monitored_node_t * mon_node, int16_t pkey_ix,
1271 uint8_t port, ib_port_counters_t * pc,
1272 boolean_t xmit_wait_sup)
1273 {
1274 osm_madw_context_t mad_context;
1275 ib_api_status_t status;
1276 ib_net32_t remote_qp;
1277 uint16_t counter_select;
1278 uint8_t counter_select2;
1279
1280 OSM_LOG_ENTER(pm->log);
1281
1282 if (counter_overflow_16(pc->symbol_err_cnt) ||
1283 counter_overflow_8(pc->link_err_recover) ||
1284 counter_overflow_8(pc->link_downed) ||
1285 counter_overflow_16(pc->rcv_err) ||
1286 counter_overflow_16(pc->rcv_rem_phys_err) ||
1287 counter_overflow_16(pc->rcv_switch_relay_err) ||
1288 counter_overflow_16(pc->xmit_discards) ||
1289 counter_overflow_8(pc->xmit_constraint_err) ||
1290 counter_overflow_8(pc->rcv_constraint_err) ||
1291 counter_overflow_4(PC_LINK_INT(pc->link_int_buffer_overrun)) ||
1292 counter_overflow_4(PC_BUF_OVERRUN(pc->link_int_buffer_overrun)) ||
1293 counter_overflow_16(pc->vl15_dropped) ||
1294 (xmit_wait_sup && counter_overflow_32(pc->xmit_wait)) ||
1295 (!pce_supported(mon_node, port) &&
1296 (counter_overflow_32(pc->xmit_data) ||
1297 counter_overflow_32(pc->rcv_data) ||
1298 counter_overflow_32(pc->xmit_pkts) ||
1299 counter_overflow_32(pc->rcv_pkts)))) {
1300 osm_node_t *p_node = NULL;
1301 ib_net16_t lid = 0;
1302
1303 if (!mon_node->port[port].valid)
1304 goto Exit;
1305
1306 osm_log_v2(pm->log, OSM_LOG_VERBOSE, FILE_ID,
1307 "PerfMgr: Counter overflow: %s (0x%" PRIx64
1308 ") port %d; clearing counters\n",
1309 mon_node->name, mon_node->guid, port);
1310
1311 cl_plock_acquire(&pm->osm->lock);
1312 p_node =
1313 osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid));
1314 if (!p_node) {
1315 OSM_LOG(pm->log, OSM_LOG_ERROR,
1316 "ERR 5407: Node \"%s\" (guid 0x%" PRIx64
1317 ") no longer exists so removing from PerfMgr"
1318 " monitoring\n",
1319 mon_node->name, mon_node->guid);
1320 goto Exit;
1321 }
1322 lid = get_lid(p_node, port, mon_node);
1323 cl_plock_release(&pm->osm->lock);
1324 if (lid == 0) {
1325 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 540C: "
1326 "Failed to clear counters for %s (0x%"
1327 PRIx64 ") port %d; failed to get lid\n",
1328 mon_node->name, mon_node->guid, port);
1329 goto Exit;
1330 }
1331
1332 remote_qp = get_qp(NULL, port);
1333
1334 mad_context.perfmgr_context.node_guid = mon_node->guid;
1335 mad_context.perfmgr_context.port = port;
1336 mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET;
1337
1338 /* apparently some HW uses the same counters for the 32 and 64
1339 * bit versions and a clear of them in the PortCounters
1340 * attribute also clears the ExtendedPortCounters equivalant
1341 * counters
1342 */
1343 if (pce_supported(mon_node, port))
1344 counter_select = 0x0fff;
1345 else
1346 counter_select = 0xffff;
1347
1348 if (xmit_wait_sup)
1349 counter_select2 = 1;
1350 else
1351 counter_select2 = 0;
1352
1353 status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix,
1354 port, IB_MAD_METHOD_SET,
1355 counter_select,
1356 counter_select2,
1357 &mad_context,
1358 0); /* FIXME SL != 0 */
1359 if (status != IB_SUCCESS)
1360 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: "
1361 "Failed to send clear counters MAD for %s (0x%"
1362 PRIx64 ") port %d\n",
1363 mon_node->name, mon_node->guid, port);
1364
1365 perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port);
1366 if (!pce_supported(mon_node, port))
1367 perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port);
1368 }
1369
1370 Exit:
1371 OSM_LOG_EXIT(pm->log);
1372 }
1373
1374 /**********************************************************************
1375 * Check if the port counters have overflowed and if so issue a clear
1376 * MAD to the port
1377 **********************************************************************/
perfmgr_check_pce_overflow(osm_perfmgr_t * pm,monitored_node_t * mon_node,int16_t pkey_ix,uint8_t port,ib_port_counters_ext_t * pc)1378 static void perfmgr_check_pce_overflow(osm_perfmgr_t * pm,
1379 monitored_node_t * mon_node,
1380 int16_t pkey_ix,
1381 uint8_t port,
1382 ib_port_counters_ext_t * pc)
1383 {
1384 osm_madw_context_t mad_context;
1385 ib_api_status_t status;
1386 ib_net32_t remote_qp;
1387
1388 OSM_LOG_ENTER(pm->log);
1389
1390 if (counter_overflow_64(pc->xmit_data) ||
1391 counter_overflow_64(pc->rcv_data) ||
1392 counter_overflow_64(pc->xmit_pkts) ||
1393 counter_overflow_64(pc->rcv_pkts) ||
1394 (ietf_supported(mon_node, port) &&
1395 (counter_overflow_64(pc->unicast_xmit_pkts) ||
1396 counter_overflow_64(pc->unicast_rcv_pkts) ||
1397 counter_overflow_64(pc->multicast_xmit_pkts) ||
1398 counter_overflow_64(pc->multicast_rcv_pkts)))) {
1399 osm_node_t *p_node = NULL;
1400 ib_net16_t lid = 0;
1401
1402 if (!mon_node->port[port].valid)
1403 goto Exit;
1404
1405 osm_log(pm->log, OSM_LOG_VERBOSE,
1406 "PerfMgr: PortCountersExtended overflow: %s (0x%"
1407 PRIx64 ") port %d; clearing counters\n",
1408 mon_node->name, mon_node->guid, port);
1409
1410 cl_plock_acquire(&pm->osm->lock);
1411 p_node =
1412 osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid));
1413 if (!p_node) {
1414 OSM_LOG(pm->log, OSM_LOG_ERROR,
1415 "ERR 5407: Node \"%s\" (guid 0x%" PRIx64
1416 ") no longer exists so removing from PerfMgr"
1417 " monitoring\n",
1418 mon_node->name, mon_node->guid);
1419 goto Exit;
1420 }
1421 lid = get_lid(p_node, port, mon_node);
1422 cl_plock_release(&pm->osm->lock);
1423 if (lid == 0) {
1424 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5418: "
1425 "Failed to clear counters for %s (0x%"
1426 PRIx64 ") port %d; failed to get lid\n",
1427 mon_node->name, mon_node->guid, port);
1428 goto Exit;
1429 }
1430
1431 remote_qp = get_qp(NULL, port);
1432
1433 mad_context.perfmgr_context.node_guid = mon_node->guid;
1434 mad_context.perfmgr_context.port = port;
1435 mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET;
1436 /* clear port counters */
1437 status = perfmgr_send_pce_mad(pm, lid, remote_qp, pkey_ix,
1438 port, IB_MAD_METHOD_SET,
1439 &mad_context,
1440 0); /* FIXME SL != 0 */
1441 if (status != IB_SUCCESS)
1442 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5419: "
1443 "Failed to send clear counters MAD for %s (0x%"
1444 PRIx64 ") port %d\n",
1445 mon_node->name, mon_node->guid, port);
1446
1447 perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port);
1448 }
1449
1450 Exit:
1451 OSM_LOG_EXIT(pm->log);
1452 }
1453
1454 /**********************************************************************
1455 * Check values for logging of errors
1456 **********************************************************************/
perfmgr_log_errors(osm_perfmgr_t * pm,monitored_node_t * mon_node,uint8_t port,perfmgr_db_err_reading_t * reading)1457 static void perfmgr_log_errors(osm_perfmgr_t * pm,
1458 monitored_node_t * mon_node, uint8_t port,
1459 perfmgr_db_err_reading_t * reading)
1460 {
1461 perfmgr_db_err_reading_t prev_read;
1462 perfmgr_db_err_t err =
1463 perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_read);
1464 uint64_t cur, prev;
1465
1466 if (err != PERFMGR_EVENT_DB_SUCCESS) {
1467 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous "
1468 "reading for %s (0x%" PRIx64 ") port %u\n",
1469 mon_node->name, mon_node->guid, port);
1470 return;
1471 }
1472
1473 #define LOG_ERR_CNT(errname, errnum, counter_name) \
1474 if (reading->counter_name > prev_read.counter_name) { \
1475 if (mon_node->port[port].remote_valid == TRUE) \
1476 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR %s: " \
1477 "%s : %" PRIu64 " : node " \
1478 "\"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u " \
1479 "connected to \"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", \
1480 errnum, errname, \
1481 reading->counter_name - prev_read.counter_name, \
1482 mon_node->name, mon_node->guid, port, \
1483 mon_node->port[port].remote_name, \
1484 mon_node->port[port].remote_guid, \
1485 mon_node->port[port].remote_port); \
1486 else \
1487 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR %s: " \
1488 "%s : %" PRIu64 " : node " \
1489 "\"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", \
1490 errnum, errname, \
1491 reading->counter_name - prev_read.counter_name, \
1492 mon_node->name, mon_node->guid, port); \
1493 }
1494
1495 LOG_ERR_CNT("SymbolErrorCounter", "5431", symbol_err_cnt);
1496 LOG_ERR_CNT("LinkErrorRecoveryCounter", "5432", link_err_recover);
1497 LOG_ERR_CNT("LinkDownedCounter", "5433", link_downed);
1498 LOG_ERR_CNT("PortRcvErrors", "5434", rcv_err);
1499 LOG_ERR_CNT("PortRcvRemotePhysicalErrors", "5435", rcv_rem_phys_err);
1500 LOG_ERR_CNT("PortRcvSwitchRelayErrors", "5436", rcv_switch_relay_err);
1501 LOG_ERR_CNT("PortXmitDiscards", "5437", xmit_discards);
1502 LOG_ERR_CNT("PortXmitConstraintErrors", "5438", xmit_constraint_err);
1503 LOG_ERR_CNT("PortRcvConstraintErrors", "5439", rcv_constraint_err);
1504 LOG_ERR_CNT("LocalLinkIntegrityErrors", "543A", link_integrity);
1505 LOG_ERR_CNT("ExcessiveBufferOverrunErrors", "543B", buffer_overrun);
1506 LOG_ERR_CNT("VL15Dropped", "543C", vl15_dropped);
1507
1508 cur = reading->xmit_wait;
1509 prev = prev_read.xmit_wait;
1510 if (pm->xmit_wait_log && cur > prev &&
1511 (cur - prev) >= pm->xmit_wait_threshold) {
1512 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 543D: XmitWait : %" PRIu64
1513 " : node \"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n",
1514 cur - prev, mon_node->name, mon_node->guid, port);
1515 }
1516 }
1517
validate_redir_pkey(osm_perfmgr_t * pm,ib_net16_t pkey)1518 static int16_t validate_redir_pkey(osm_perfmgr_t *pm, ib_net16_t pkey)
1519 {
1520 int16_t pkey_ix = -1;
1521 osm_port_t *p_port;
1522 osm_pkey_tbl_t *p_pkey_tbl;
1523 ib_net16_t *p_orig_pkey;
1524 uint16_t block;
1525 uint8_t index;
1526
1527 OSM_LOG_ENTER(pm->log);
1528
1529 CL_PLOCK_ACQUIRE(pm->sm->p_lock);
1530 p_port = osm_get_port_by_guid(pm->subn, pm->port_guid);
1531 if (!p_port) {
1532 CL_PLOCK_RELEASE(pm->sm->p_lock);
1533 OSM_LOG(pm->log, OSM_LOG_ERROR,
1534 "ERR 541E: No PerfMgr port object\n");
1535 goto Exit;
1536 }
1537 if (p_port->p_physp && osm_physp_is_valid(p_port->p_physp)) {
1538 p_pkey_tbl = &p_port->p_physp->pkeys;
1539 if (!p_pkey_tbl) {
1540 CL_PLOCK_RELEASE(pm->sm->p_lock);
1541 OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1542 "No PKey table found for PerfMgr port\n");
1543 goto Exit;
1544 }
1545 p_orig_pkey = cl_map_get(&p_pkey_tbl->keys,
1546 ib_pkey_get_base(pkey));
1547 if (!p_orig_pkey) {
1548 CL_PLOCK_RELEASE(pm->sm->p_lock);
1549 OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1550 "PKey 0x%x not found for PerfMgr port\n",
1551 cl_ntoh16(pkey));
1552 goto Exit;
1553 }
1554 if (osm_pkey_tbl_get_block_and_idx(p_pkey_tbl, p_orig_pkey,
1555 &block, &index) == IB_SUCCESS) {
1556 CL_PLOCK_RELEASE(pm->sm->p_lock);
1557 pkey_ix = block * IB_NUM_PKEY_ELEMENTS_IN_BLOCK + index;
1558 } else {
1559 CL_PLOCK_RELEASE(pm->sm->p_lock);
1560 OSM_LOG(pm->log, OSM_LOG_ERROR,
1561 "ERR 541F: Failed to obtain P_Key 0x%04x "
1562 "block and index for PerfMgr port\n",
1563 cl_ntoh16(pkey));
1564 }
1565 } else {
1566 CL_PLOCK_RELEASE(pm->sm->p_lock);
1567 OSM_LOG(pm->log, OSM_LOG_ERROR,
1568 "ERR 5420: Local PerfMgt port physp invalid\n");
1569 }
1570
1571 Exit:
1572 OSM_LOG_EXIT(pm->log);
1573 return pkey_ix;
1574 }
1575
handle_redirect(osm_perfmgr_t * pm,ib_class_port_info_t * cpi,monitored_node_t * p_mon_node,uint8_t port,osm_madw_context_t * mad_context)1576 static boolean_t handle_redirect(osm_perfmgr_t *pm,
1577 ib_class_port_info_t *cpi,
1578 monitored_node_t *p_mon_node,
1579 uint8_t port,
1580 osm_madw_context_t *mad_context)
1581 {
1582 char gid_str[INET6_ADDRSTRLEN];
1583 ib_api_status_t status;
1584 boolean_t valid = TRUE;
1585 int16_t pkey_ix = 0;
1586 uint8_t mad_method;
1587
1588 OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1589 "Redirection to LID %u GID %s QP 0x%x received\n",
1590 cl_ntoh16(cpi->redir_lid),
1591 inet_ntop(AF_INET6, cpi->redir_gid.raw, gid_str,
1592 sizeof gid_str), cl_ntoh32(cpi->redir_qp));
1593
1594 if (!pm->subn->opt.perfmgr_redir) {
1595 OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1596 "Redirection requested but disabled\n");
1597 valid = FALSE;
1598 }
1599
1600 /* valid redirection ? */
1601 if (cpi->redir_lid == 0) {
1602 if (!ib_gid_is_notzero(&cpi->redir_gid)) {
1603 OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1604 "Invalid redirection "
1605 "(both redirect LID and GID are zero)\n");
1606 valid = FALSE;
1607 }
1608 }
1609 if (cpi->redir_qp == 0) {
1610 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQP\n");
1611 valid = FALSE;
1612 }
1613 if (cpi->redir_pkey == 0) {
1614 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectP_Key\n");
1615 valid = FALSE;
1616 }
1617 if (cpi->redir_qkey != IB_QP1_WELL_KNOWN_Q_KEY) {
1618 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQ_Key\n");
1619 valid = FALSE;
1620 }
1621
1622 pkey_ix = validate_redir_pkey(pm, cpi->redir_pkey);
1623 if (pkey_ix == -1) {
1624 OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1625 "Index for Pkey 0x%x not found\n",
1626 cl_ntoh16(cpi->redir_pkey));
1627 valid = FALSE;
1628 }
1629
1630 if (cpi->redir_lid == 0) {
1631 /* GID redirection: get PathRecord information */
1632 OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1633 "GID redirection not currently supported\n");
1634 goto Exit;
1635 }
1636
1637 if (!valid)
1638 goto Exit;
1639
1640 /* LID redirection support (easier than GID redirection) */
1641 cl_plock_acquire(&pm->osm->lock);
1642 p_mon_node->port[port].redirection = TRUE;
1643 p_mon_node->port[port].valid = valid;
1644 memcpy(&p_mon_node->port[port].gid, &cpi->redir_gid,
1645 sizeof(ib_gid_t));
1646 p_mon_node->port[port].lid = cpi->redir_lid;
1647 p_mon_node->port[port].qp = cpi->redir_qp;
1648 p_mon_node->port[port].pkey = cpi->redir_pkey;
1649 if (pkey_ix != -1)
1650 p_mon_node->port[port].pkey_ix = pkey_ix;
1651 cl_plock_release(&pm->osm->lock);
1652
1653 /* either */
1654 if (pm->query_cpi)
1655 {
1656 /* issue a CPI query to the redirected location */
1657 mad_method = IB_MAD_METHOD_GET;
1658 p_mon_node->port[port].cpi_valid = FALSE;
1659 status = perfmgr_send_cpi_mad(pm, cpi->redir_lid,
1660 cpi->redir_qp, pkey_ix,
1661 port, mad_context,
1662 0); /* FIXME SL != 0 */
1663 } else {
1664 /* reissue the original query to the redirected location */
1665 uint8_t counter_select2;
1666
1667 if (xmit_wait_supported(p_mon_node, port))
1668 counter_select2 = 1;
1669 else
1670 counter_select2 = 0;
1671
1672 mad_method = mad_context->perfmgr_context.mad_method;
1673 if (mad_context->perfmgr_context.mad_attr_id
1674 == IB_MAD_ATTR_PORT_CNTRS) {
1675 status = perfmgr_send_pc_mad(pm, cpi->redir_lid,
1676 cpi->redir_qp,
1677 pkey_ix, port,
1678 mad_method,
1679 0xffff,
1680 counter_select2,
1681 mad_context,
1682 0); /* FIXME SL != 0 */
1683 } else {
1684 status = perfmgr_send_pce_mad(pm, cpi->redir_lid,
1685 cpi->redir_qp,
1686 pkey_ix, port,
1687 mad_method,
1688 mad_context,
1689 0); /* FIXME SL != 0 */
1690 }
1691 }
1692 if (status != IB_SUCCESS)
1693 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5414: "
1694 "Failed to send redirected MAD "
1695 "with method 0x%x for node %s "
1696 "(NodeGuid 0x%" PRIx64 ") port %d\n",
1697 mad_method, p_mon_node->name, p_mon_node->guid, port);
1698 Exit:
1699 return (valid);
1700 }
1701
1702 /**********************************************************************
1703 * Detect if someone else on the network could have cleared the counters
1704 * without us knowing. This is easy to detect because the counters never
1705 * wrap but are "sticky" PortCountersExtended version.
1706 *
1707 * The one time this will not work is if the port is getting errors fast
1708 * enough to have the reading overtake the previous reading. In this case,
1709 * counters will be missed.
1710 **********************************************************************/
perfmgr_check_data_cnt_oob_clear(osm_perfmgr_t * pm,monitored_node_t * mon_node,uint8_t port,perfmgr_db_data_cnt_reading_t * dc)1711 static void perfmgr_check_data_cnt_oob_clear(osm_perfmgr_t * pm,
1712 monitored_node_t * mon_node,
1713 uint8_t port,
1714 perfmgr_db_data_cnt_reading_t * dc)
1715 {
1716 perfmgr_db_data_cnt_reading_t prev_dc;
1717
1718 if (perfmgr_db_get_prev_dc(pm->db, mon_node->guid, port, &prev_dc)
1719 != PERFMGR_EVENT_DB_SUCCESS) {
1720 OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1721 "Failed to find previous data count "
1722 "reading for %s (0x%" PRIx64 ") port %u\n",
1723 mon_node->name, mon_node->guid, port);
1724 return;
1725 }
1726
1727 OSM_LOG(pm->log, OSM_LOG_DEBUG,
1728 "Data vs previous node %s (0x%" PRIx64 ") port %u\n"
1729 "TX: %"PRIu64" ?< %"PRIu64"\n"
1730 "RX: %"PRIu64" ?< %"PRIu64"\n"
1731 "TXP: %"PRIu64" ?< %"PRIu64"\n"
1732 "RXP: %"PRIu64" ?< %"PRIu64"\n"
1733 "UTXP: %"PRIu64" ?< %"PRIu64"\n"
1734 "URXP: %"PRIu64" ?< %"PRIu64"\n"
1735 "MTXP: %"PRIu64" ?< %"PRIu64"\n"
1736 "MRXP: %"PRIu64" ?< %"PRIu64"\n"
1737 ,
1738 mon_node->name, mon_node->guid, port,
1739 dc->xmit_data, prev_dc.xmit_data,
1740 dc->rcv_data, prev_dc.rcv_data,
1741 dc->xmit_pkts, prev_dc.xmit_pkts,
1742 dc->rcv_pkts, prev_dc.rcv_pkts,
1743 dc->unicast_xmit_pkts, prev_dc.unicast_xmit_pkts,
1744 dc->unicast_rcv_pkts, prev_dc.unicast_rcv_pkts,
1745 dc->multicast_xmit_pkts, prev_dc.multicast_xmit_pkts,
1746 dc->multicast_rcv_pkts, prev_dc.multicast_rcv_pkts);
1747
1748 if (dc->xmit_data < prev_dc.xmit_data ||
1749 dc->rcv_data < prev_dc.rcv_data ||
1750 dc->xmit_pkts < prev_dc.xmit_pkts ||
1751 dc->rcv_pkts < prev_dc.rcv_pkts ||
1752 (ietf_supported(mon_node, port) &&
1753 (dc->unicast_xmit_pkts < prev_dc.unicast_xmit_pkts ||
1754 dc->unicast_rcv_pkts < prev_dc.unicast_rcv_pkts ||
1755 dc->multicast_xmit_pkts < prev_dc.multicast_xmit_pkts ||
1756 dc->multicast_rcv_pkts < prev_dc.multicast_rcv_pkts))) {
1757 OSM_LOG(pm->log, OSM_LOG_ERROR,
1758 "PerfMgr: ERR 540B: Detected an out of band data counter "
1759 "clear on node %s (0x%" PRIx64 ") port %u\n",
1760 mon_node->name, mon_node->guid, port);
1761
1762 perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port);
1763 }
1764 }
1765
1766 /**********************************************************************
1767 * The dispatcher uses a thread pool which will call this function when
1768 * there is a thread available to process the mad received on the wire
1769 **********************************************************************/
pc_recv_process(void * context,void * data)1770 static void pc_recv_process(void *context, void *data)
1771 {
1772 osm_perfmgr_t *pm = context;
1773 osm_madw_t *p_madw = data;
1774 osm_madw_context_t *mad_context = &p_madw->context;
1775 ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw);
1776 uint64_t node_guid = mad_context->perfmgr_context.node_guid;
1777 uint8_t port = mad_context->perfmgr_context.port;
1778 perfmgr_db_err_reading_t err_reading;
1779 perfmgr_db_data_cnt_reading_t data_reading;
1780 cl_map_item_t *p_node;
1781 monitored_node_t *p_mon_node;
1782 ib_class_port_info_t *cpi = NULL;
1783
1784 OSM_LOG_ENTER(pm->log);
1785
1786 /*
1787 * get the monitored node struct to have the printable name
1788 * for log messages
1789 */
1790 if ((p_node = cl_qmap_get(&pm->monitored_map, node_guid)) ==
1791 cl_qmap_end(&pm->monitored_map)) {
1792 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5412: GUID 0x%016"
1793 PRIx64 " not found in monitored map\n", node_guid);
1794 goto Exit;
1795 }
1796 p_mon_node = (monitored_node_t *) p_node;
1797
1798 OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1799 "Processing received MAD status 0x%x context 0x%"
1800 PRIx64 " port %u\n", cl_ntoh16(p_mad->status), node_guid, port);
1801
1802 CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS ||
1803 p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS_EXT ||
1804 p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO);
1805
1806 cl_plock_acquire(&pm->osm->lock);
1807 /* validate port number */
1808 if (port >= p_mon_node->num_ports) {
1809 cl_plock_release(&pm->osm->lock);
1810 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: "
1811 "Invalid port num %d for GUID 0x%016"
1812 PRIx64 " num ports %d\n", port, node_guid,
1813 p_mon_node->num_ports);
1814 goto Exit;
1815 }
1816 cl_plock_release(&pm->osm->lock);
1817
1818 /* capture CLASS_PORT_INFO data */
1819 if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
1820 boolean_t cpi_valid = TRUE;
1821
1822 cpi = (ib_class_port_info_t *) &
1823 (osm_madw_get_perfmgt_mad_ptr(p_madw)->data);
1824
1825 /* Response could be redirection (IBM eHCA PMA does this) */
1826 if (p_mad->status & IB_MAD_STATUS_REDIRECT)
1827 cpi_valid = handle_redirect(pm, cpi, p_mon_node, port,
1828 mad_context);
1829
1830 if (pm->query_cpi && cpi_valid) {
1831 cl_plock_acquire(&pm->osm->lock);
1832 if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) {
1833 int i;
1834 for (i = p_mon_node->esp0 ? 0 : 1;
1835 i < p_mon_node->num_ports;
1836 i++) {
1837 p_mon_node->port[i].cap_mask = cpi->cap_mask;
1838 p_mon_node->port[i].cpi_valid = cpi_valid;
1839 }
1840 } else {
1841 p_mon_node->port[port].cap_mask = cpi->cap_mask;
1842 p_mon_node->port[port].cpi_valid = cpi_valid;
1843 }
1844 cl_plock_release(&pm->osm->lock);
1845 }
1846 goto Exit;
1847 }
1848
1849 if (p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS_EXT) {
1850 ib_port_counters_ext_t *ext_wire_read =
1851 (ib_port_counters_ext_t *)
1852 &osm_madw_get_perfmgt_mad_ptr(p_madw)->data;
1853
1854 /* convert wire data to perfmgr data counter reading */
1855 perfmgr_db_fill_data_cnt_read_pce(ext_wire_read, &data_reading,
1856 ietf_supported(p_mon_node,
1857 port));
1858
1859 /* add counter */
1860 if (mad_context->perfmgr_context.mad_method
1861 == IB_MAD_METHOD_GET) {
1862 /* detect an out of band clear on the port */
1863 perfmgr_check_data_cnt_oob_clear(pm, p_mon_node, port,
1864 &data_reading);
1865
1866 perfmgr_db_add_dc_reading(pm->db, node_guid, port,
1867 &data_reading,
1868 ietf_supported(p_mon_node,
1869 port));
1870 } else {
1871 perfmgr_db_clear_prev_dc(pm->db, node_guid, port);
1872 }
1873
1874 perfmgr_check_pce_overflow(pm, p_mon_node,
1875 p_mon_node->port[port].pkey_ix,
1876 port, ext_wire_read);
1877 } else {
1878 boolean_t pce_sup = pce_supported(p_mon_node, port);
1879 boolean_t xmit_wait_sup = xmit_wait_supported(p_mon_node, port);
1880 ib_port_counters_t *wire_read =
1881 (ib_port_counters_t *)
1882 &osm_madw_get_perfmgt_mad_ptr(p_madw)->data;
1883
1884 perfmgr_db_fill_err_read(wire_read, &err_reading, xmit_wait_sup);
1885 if (!pce_sup)
1886 perfmgr_db_fill_data_cnt_read_pc(wire_read, &data_reading);
1887
1888 if (mad_context->perfmgr_context.mad_method == IB_MAD_METHOD_GET) {
1889 /* detect an out of band clear on the port */
1890 perfmgr_check_oob_clear(pm, p_mon_node, port, &err_reading);
1891 if (!pce_sup)
1892 perfmgr_check_data_cnt_oob_clear(pm, p_mon_node, port,
1893 &data_reading);
1894
1895 /* log errors from this reading */
1896 if (pm->subn->opt.perfmgr_log_errors)
1897 perfmgr_log_errors(pm, p_mon_node, port, &err_reading);
1898
1899 perfmgr_db_add_err_reading(pm->db, node_guid, port,
1900 &err_reading);
1901 if (!pce_sup)
1902 perfmgr_db_add_dc_reading(pm->db, node_guid, port,
1903 &data_reading, 0);
1904 } else {
1905 perfmgr_db_clear_prev_err(pm->db, node_guid, port);
1906 if (!pce_sup)
1907 perfmgr_db_clear_prev_dc(pm->db, node_guid, port);
1908 }
1909
1910 perfmgr_check_overflow(pm, p_mon_node, p_mon_node->port[port].pkey_ix,
1911 port, wire_read, xmit_wait_sup);
1912
1913 }
1914
1915 #ifdef ENABLE_OSM_PERF_MGR_PROFILE
1916 do {
1917 struct timeval proc_time;
1918 gettimeofday(&proc_time, NULL);
1919 diff_time(&p_madw->context.perfmgr_context.query_start,
1920 &proc_time, &proc_time);
1921 update_mad_stats(&proc_time);
1922 } while (0);
1923 #endif
1924
1925 Exit:
1926 osm_mad_pool_put(pm->mad_pool, p_madw);
1927
1928 OSM_LOG_EXIT(pm->log);
1929 }
1930
1931 /**********************************************************************
1932 * Initialize the PerfMgr object
1933 **********************************************************************/
osm_perfmgr_init(osm_perfmgr_t * pm,osm_opensm_t * osm,const osm_subn_opt_t * p_opt)1934 ib_api_status_t osm_perfmgr_init(osm_perfmgr_t * pm, osm_opensm_t * osm,
1935 const osm_subn_opt_t * p_opt)
1936 {
1937 ib_api_status_t status;
1938
1939 OSM_LOG_ENTER(&osm->log);
1940
1941 OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "Initializing PerfMgr\n");
1942
1943 memset(pm, 0, sizeof(*pm));
1944
1945 pm->subn = &osm->subn;
1946 pm->sm = &osm->sm;
1947 pm->log = &osm->log;
1948 pm->mad_pool = &osm->mad_pool;
1949 pm->vendor = osm->p_vendor;
1950 pm->trans_id = PERFMGR_INITIAL_TID_VALUE;
1951 pm->state =
1952 p_opt->perfmgr ? PERFMGR_STATE_ENABLED : PERFMGR_STATE_DISABLE;
1953 pm->sweep_state = PERFMGR_SWEEP_SLEEP;
1954 status = cl_spinlock_init(&pm->lock);
1955 if (status != IB_SUCCESS)
1956 goto Exit;
1957 pm->sweep_time_s = p_opt->perfmgr_sweep_time_s;
1958 pm->max_outstanding_queries = p_opt->perfmgr_max_outstanding_queries;
1959 pm->ignore_cas = p_opt->perfmgr_ignore_cas;
1960 pm->osm = osm;
1961 pm->local_port = -1;
1962
1963 status = cl_timer_init(&pm->sweep_timer, perfmgr_sweep, pm);
1964 if (status != IB_SUCCESS)
1965 goto Exit;
1966
1967 status = IB_INSUFFICIENT_RESOURCES;
1968 pm->db = perfmgr_db_construct(pm);
1969 if (!pm->db) {
1970 pm->state = PERFMGR_STATE_NO_DB;
1971 goto Exit;
1972 }
1973
1974 pm->pc_disp_h = cl_disp_register(&osm->disp, OSM_MSG_MAD_PORT_COUNTERS,
1975 pc_recv_process, pm);
1976 if (pm->pc_disp_h == CL_DISP_INVALID_HANDLE) {
1977 perfmgr_db_destroy(pm->db);
1978 goto Exit;
1979 }
1980
1981 init_monitored_nodes(pm);
1982
1983 if (pm->state == PERFMGR_STATE_ENABLED)
1984 cl_timer_start(&pm->sweep_timer, pm->sweep_time_s * 1000);
1985
1986 pm->rm_nodes = p_opt->perfmgr_rm_nodes;
1987 pm->query_cpi = p_opt->perfmgr_query_cpi;
1988 pm->xmit_wait_log = p_opt->perfmgr_xmit_wait_log;
1989 pm->xmit_wait_threshold = p_opt->perfmgr_xmit_wait_threshold;
1990 status = IB_SUCCESS;
1991 Exit:
1992 OSM_LOG_EXIT(pm->log);
1993 return status;
1994 }
1995
1996 /**********************************************************************
1997 * Clear the counters from the db
1998 **********************************************************************/
osm_perfmgr_clear_counters(osm_perfmgr_t * pm)1999 void osm_perfmgr_clear_counters(osm_perfmgr_t * pm)
2000 {
2001 /**
2002 * FIXME todo issue clear on the fabric?
2003 */
2004 perfmgr_db_clear_counters(pm->db);
2005 osm_log_v2(pm->log, OSM_LOG_INFO, FILE_ID, "PerfMgr counters cleared\n");
2006 }
2007
2008 /*******************************************************************
2009 * Dump the DB information to the file specified
2010 *******************************************************************/
osm_perfmgr_dump_counters(osm_perfmgr_t * pm,perfmgr_db_dump_t dump_type)2011 void osm_perfmgr_dump_counters(osm_perfmgr_t * pm, perfmgr_db_dump_t dump_type)
2012 {
2013 char path[256];
2014 char *file_name;
2015 if (pm->subn->opt.event_db_dump_file)
2016 file_name = pm->subn->opt.event_db_dump_file;
2017 else {
2018 snprintf(path, sizeof(path), "%s/%s",
2019 pm->subn->opt.dump_files_dir,
2020 OSM_PERFMGR_DEFAULT_DUMP_FILE);
2021 file_name = path;
2022 }
2023 if (perfmgr_db_dump(pm->db, file_name, dump_type) != 0)
2024 OSM_LOG(pm->log, OSM_LOG_ERROR, "Failed to dump file %s : %s",
2025 file_name, strerror(errno));
2026 }
2027
2028 /*******************************************************************
2029 * Print the DB information to the fp specified
2030 *******************************************************************/
osm_perfmgr_print_counters(osm_perfmgr_t * pm,char * nodename,FILE * fp,char * port,int err_only)2031 void osm_perfmgr_print_counters(osm_perfmgr_t * pm, char *nodename, FILE * fp,
2032 char *port, int err_only)
2033 {
2034 if (nodename) {
2035 char *end = NULL;
2036 uint64_t guid = strtoull(nodename, &end, 0);
2037 if (nodename + strlen(nodename) != end)
2038 perfmgr_db_print_by_name(pm->db, nodename, fp, port,
2039 err_only);
2040 else
2041 perfmgr_db_print_by_guid(pm->db, guid, fp, port,
2042 err_only);
2043 } else
2044 perfmgr_db_print_all(pm->db, fp, err_only);
2045 }
2046
osm_perfmgr_update_nodename(osm_perfmgr_t * pm,uint64_t node_guid,char * nodename)2047 void osm_perfmgr_update_nodename(osm_perfmgr_t *pm, uint64_t node_guid,
2048 char *nodename)
2049 {
2050 if (pm->db)
2051 perfmgr_db_update_name(pm->db, node_guid, nodename);
2052 }
2053 #endif /* ENABLE_OSM_PERF_MGR */
2054