1 /*
2 * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2007 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 * Copyright (c) 2009,2010 HNR Consulting. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 *
35 */
36
37 /*
38 * Abstract:
39 * Implementation of osm_trap_rcv_t.
40 * This object represents the Trap Receiver object.
41 * This object is part of the opensm family of objects.
42 */
43
44 #if HAVE_CONFIG_H
45 # include <config.h>
46 #endif /* HAVE_CONFIG_H */
47
48 #include <string.h>
49 #include <iba/ib_types.h>
50 #include <complib/cl_qmap.h>
51 #include <complib/cl_debug.h>
52 #include <opensm/osm_file_ids.h>
53 #define FILE_ID OSM_FILE_TRAP_RCV_C
54 #include <opensm/osm_madw.h>
55 #include <opensm/osm_log.h>
56 #include <opensm/osm_node.h>
57 #include <opensm/osm_helper.h>
58 #include <opensm/osm_subnet.h>
59 #include <opensm/osm_inform.h>
60 #include <opensm/osm_opensm.h>
61
62 extern void osm_req_get_node_desc(IN osm_sm_t * sm, osm_physp_t *p_physp);
63
64 /**********************************************************************
65 *
66 * TRAP HANDLING:
67 *
68 * Assuming traps can be caused by bad hardware we should provide
69 * a mechanism for filtering their propagation into the actual logic
70 * of OpenSM such that it is not overloaded by them.
71 *
72 * We will provide a trap filtering mechanism with "Aging" capability.
73 * This mechanism will track incoming traps, clasify them by their
74 * source and content and provide back their age.
75 *
76 * A timer running in the background will toggle a timer counter
77 * that should be referenced by the aging algorithm.
78 * To provide an efficient handling of aging, we also track all traps
79 * in a sorted list by their aging.
80 *
81 * The generic Aging Tracker mechanism is implemented in the
82 * cl_aging_tracker object.
83 *
84 **********************************************************************/
85
get_physp_by_lid_and_num(IN osm_sm_t * sm,IN ib_net16_t lid,IN uint8_t num)86 static osm_physp_t *get_physp_by_lid_and_num(IN osm_sm_t * sm,
87 IN ib_net16_t lid, IN uint8_t num)
88 {
89 osm_port_t *p_port = osm_get_port_by_lid(sm->p_subn, lid);
90 if (!p_port)
91 return NULL;
92
93 if (osm_node_get_num_physp(p_port->p_node) <= num)
94 return NULL;
95
96 return osm_node_get_physp_ptr(p_port->p_node, num);
97 }
98
aging_tracker_callback(IN uint64_t key,IN uint32_t num_regs,IN void * context)99 static uint64_t aging_tracker_callback(IN uint64_t key, IN uint32_t num_regs,
100 IN void *context)
101 {
102 osm_sm_t *sm = context;
103 ib_net16_t lid;
104 uint8_t port_num;
105 osm_physp_t *p_physp;
106
107 OSM_LOG_ENTER(sm->p_log);
108
109 if (osm_exit_flag)
110 /* We got an exit flag - do nothing */
111 return 0;
112
113 lid = (ib_net16_t) ((key & 0x0000FFFF00000000ULL) >> 32);
114 port_num = (uint8_t) ((key & 0x00FF000000000000ULL) >> 48);
115
116 CL_PLOCK_ACQUIRE(sm->p_lock);
117
118 p_physp = get_physp_by_lid_and_num(sm, lid, port_num);
119 if (!p_physp)
120 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
121 "Cannot find port num:%u with lid:%u\n",
122 port_num, cl_ntoh16(lid));
123 /* make sure the physp is still valid */
124 /* If the health port was false - set it to true */
125 else if (!osm_physp_is_healthy(p_physp)) {
126 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
127 "Clearing health bit of port num:%u with lid:%u\n",
128 port_num, cl_ntoh16(lid));
129
130 /* Clear its health bit */
131 osm_physp_set_health(p_physp, TRUE);
132 }
133
134 CL_PLOCK_RELEASE(sm->p_lock);
135 OSM_LOG_EXIT(sm->p_log);
136
137 /* We want to remove the event from the tracker - so
138 need to return zero. */
139 return 0;
140 }
141
142 /**********************************************************************
143 * CRC calculation for notice identification
144 **********************************************************************/
145
146 #define CRC32_POLYNOMIAL 0xEDB88320L
147
148 /* calculate the crc for a given buffer */
trap_calc_crc32(void * buffer,uint32_t count)149 static uint32_t trap_calc_crc32(void *buffer, uint32_t count)
150 {
151 uint32_t temp1, temp2;
152 uint32_t crc = -1L;
153 unsigned char *p = (unsigned char *)buffer;
154 /* precalculated table for faster crc calculation */
155 static uint32_t crc_table[256];
156 static boolean_t first = TRUE;
157 int i, j;
158
159 /* if we need to initialize the lookup table */
160 if (first) {
161 /* calc the CRC table */
162 for (i = 0; i <= 255; i++) {
163 crc = i;
164 for (j = 8; j > 0; j--)
165 if (crc & 1)
166 crc = (crc >> 1) ^ CRC32_POLYNOMIAL;
167 else
168 crc >>= 1;
169 crc_table[i] = crc;
170 }
171 first = FALSE;
172 }
173
174 crc = -1L;
175 /* do the calculation */
176 while (count-- != 0) {
177 temp1 = (crc >> 8) & 0x00FFFFFFL;
178 temp2 = crc_table[((int)crc ^ *p++) & 0xFF];
179 crc = temp1 ^ temp2;
180 }
181 return crc;
182 }
183
184 /* The key is created in the following manner:
185 port_num lid crc
186 \______/ \___/ \___/
187 16b 16b 32b
188 */
trap_get_key(IN uint16_t lid,IN uint8_t port_num,IN ib_mad_notice_attr_t * p_ntci)189 static uint64_t trap_get_key(IN uint16_t lid, IN uint8_t port_num,
190 IN ib_mad_notice_attr_t * p_ntci)
191 {
192 uint32_t crc = trap_calc_crc32(p_ntci, sizeof(ib_mad_notice_attr_t));
193 return ((uint64_t) port_num << 48) | ((uint64_t) lid << 32) | crc;
194 }
195
print_num_received(IN uint32_t num_received)196 static int print_num_received(IN uint32_t num_received)
197 {
198 uint32_t i;
199
200 /* Series is 10, 20, 50, 100, 200, 500, ... */
201 i = num_received;
202 while (i >= 10) {
203 if (i % 10)
204 break;
205 i = i / 10;
206 }
207
208 if (i == 1 || i == 2 || i == 5)
209 return 1;
210 else
211 return 0;
212 }
213
disable_port(osm_sm_t * sm,osm_physp_t * p)214 static int disable_port(osm_sm_t *sm, osm_physp_t *p)
215 {
216 uint8_t payload[IB_SMP_DATA_SIZE];
217 osm_madw_context_t context;
218 ib_port_info_t *pi = (ib_port_info_t *)payload;
219 osm_physp_t *physp0;
220 osm_port_t *p_port;
221 ib_net64_t m_key;
222 ib_api_status_t status;
223
224 /* select the nearest port to master opensm */
225 if (p->p_remote_physp &&
226 p->dr_path.hop_count > p->p_remote_physp->dr_path.hop_count)
227 p = p->p_remote_physp;
228
229 /* If trap 131, might want to disable peer port if available */
230 /* but peer port has been observed not to respond to SM requests */
231
232 memcpy(payload, &p->port_info, sizeof(ib_port_info_t));
233
234 /* Set port to disabled/down */
235 ib_port_info_set_port_state(pi, IB_LINK_DOWN);
236 ib_port_info_set_port_phys_state(IB_PORT_PHYS_STATE_DISABLED, pi);
237
238 /* Issue set of PortInfo */
239 context.pi_context.node_guid = osm_node_get_node_guid(p->p_node);
240 context.pi_context.port_guid = osm_physp_get_port_guid(p);
241 context.pi_context.set_method = TRUE;
242 context.pi_context.light_sweep = FALSE;
243 context.pi_context.active_transition = FALSE;
244 context.pi_context.client_rereg = FALSE;
245 if (osm_node_get_type(p->p_node) == IB_NODE_TYPE_SWITCH &&
246 osm_physp_get_port_num(p) != 0) {
247 physp0 = osm_node_get_physp_ptr(p->p_node, 0);
248 m_key = ib_port_info_get_m_key(&physp0->port_info);
249 } else
250 m_key = ib_port_info_get_m_key(&p->port_info);
251
252 if (osm_node_get_type(p->p_node) != IB_NODE_TYPE_SWITCH) {
253 if (!pi->base_lid) {
254 p_port = osm_get_port_by_guid(sm->p_subn,
255 osm_physp_get_port_guid(p));
256 pi->base_lid = p_port->lid;
257 }
258 pi->master_sm_base_lid = sm->p_subn->sm_base_lid;
259 }
260
261 status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p),
262 payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
263 cl_hton32(osm_physp_get_port_num(p)),
264 FALSE, m_key,
265 CL_DISP_MSGID_NONE, &context);
266 return status;
267 }
268
log_trap_info(osm_log_t * p_log,ib_mad_notice_attr_t * p_ntci,ib_net16_t source_lid,ib_net64_t trans_id)269 static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
270 ib_net16_t source_lid, ib_net64_t trans_id)
271 {
272 if (!OSM_LOG_IS_ACTIVE_V2(p_log, OSM_LOG_ERROR))
273 return;
274
275 if (ib_notice_is_generic(p_ntci)) {
276 char str[32];
277
278 if ((p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_LINK_INTEGRITY_THRESHOLD_TRAP)) ||
279 (p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_BUFFER_OVERRUN_THRESHOLD_TRAP)) ||
280 (p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_WATCHDOG_TIMER_EXPIRED_TRAP)))
281 snprintf(str, sizeof(str), " Port %u",
282 p_ntci->data_details.ntc_129_131.port_num);
283 else
284 str[0] = '\0';
285
286 OSM_LOG(p_log, OSM_LOG_ERROR,
287 "Received Generic Notice type:%u "
288 "num:%u (%s) Producer:%u (%s) "
289 "from LID:%u%s TID:0x%016" PRIx64 "\n",
290 ib_notice_get_type(p_ntci),
291 cl_ntoh16(p_ntci->g_or_v.generic.trap_num),
292 ib_get_trap_str(p_ntci->g_or_v.generic.trap_num),
293 cl_ntoh32(ib_notice_get_prod_type(p_ntci)),
294 ib_get_producer_type_str(ib_notice_get_prod_type(p_ntci)),
295 cl_hton16(source_lid), str, cl_ntoh64(trans_id));
296 if ((p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_BAD_PKEY_TRAP)) ||
297 (p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_BAD_QKEY_TRAP))) {
298 OSM_LOG(p_log, OSM_LOG_ERROR,
299 "Bad %s_Key:0x%x on SL:%d from "
300 "LID1:%u QP1:0x%x to "
301 "LID2:%u QP2:0x%x\n",
302 (p_ntci->g_or_v.generic.trap_num == CL_HTON16(257)) ? "P" : "Q",
303 cl_ntoh32(p_ntci->data_details.ntc_257_258.key),
304 cl_ntoh32(p_ntci->data_details.ntc_257_258.qp1) >> 28,
305 cl_ntoh16(p_ntci->data_details.ntc_257_258.lid1),
306 cl_ntoh32(p_ntci->data_details.ntc_257_258.qp1) & 0xfff,
307 cl_ntoh16(p_ntci->data_details.ntc_257_258.lid2),
308 cl_ntoh32(p_ntci->data_details.ntc_257_258.qp2));
309 }
310 } else
311 OSM_LOG(p_log, OSM_LOG_ERROR,
312 "Received Vendor Notice type:%u vend:0x%06X "
313 "dev:%u from LID:%u TID:0x%016" PRIx64 "\n",
314 ib_notice_get_type(p_ntci),
315 cl_ntoh32(ib_notice_get_vend_id(p_ntci)),
316 cl_ntoh16(p_ntci->g_or_v.vend.dev_id),
317 cl_ntoh16(source_lid), cl_ntoh64(trans_id));
318 }
319
shutup_noisy_port(osm_sm_t * sm,ib_net16_t lid,uint8_t port,unsigned num)320 static int shutup_noisy_port(osm_sm_t *sm, ib_net16_t lid, uint8_t port,
321 unsigned num)
322 {
323 osm_physp_t *p = get_physp_by_lid_and_num(sm, lid, port);
324 if (!p) {
325 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: "
326 "Failed to find physical port by lid:%u num:%u\n",
327 cl_ntoh16(lid), port);
328 return -1;
329 }
330
331 /* When babbling port policy option is enabled and
332 Threshold for disabling a "babbling" port is exceeded */
333 if (sm->p_subn->opt.babbling_port_policy && num >= 250) {
334 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
335 "Disabling noisy physical port 0x%016" PRIx64
336 ": lid %u, num %u\n",
337 cl_ntoh64(osm_physp_get_port_guid(p)),
338 cl_ntoh16(lid), port);
339 if (disable_port(sm, p))
340 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: "
341 "Failed to disable noisy physical port 0x%016"
342 PRIx64 ": lid %u, num %u\n",
343 cl_ntoh64(osm_physp_get_port_guid(p)),
344 cl_ntoh16(lid), port);
345 else
346 return 1;
347 }
348
349 /* check if the current state of the p_physp is healthy. If
350 it is - then this is a first change of state. Run a heavy sweep. */
351 if (osm_physp_is_healthy(p)) {
352 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
353 "Marking unhealthy physical port by lid:%u num:%u\n",
354 cl_ntoh16(lid), port);
355 osm_physp_set_health(p, FALSE);
356 return 2;
357 }
358 return 0;
359 }
360
trap_rcv_process_request(IN osm_sm_t * sm,IN const osm_madw_t * p_madw)361 static void trap_rcv_process_request(IN osm_sm_t * sm,
362 IN const osm_madw_t * p_madw)
363 {
364 uint8_t payload[sizeof(ib_mad_notice_attr_t)];
365 ib_smp_t *p_smp;
366 ib_mad_notice_attr_t *p_ntci = (ib_mad_notice_attr_t *) payload;
367 ib_api_status_t status;
368 osm_madw_t tmp_madw; /* we need a copy to last after repress */
369 uint64_t trap_key;
370 uint32_t num_received;
371 osm_physp_t *p_physp;
372 osm_port_t *p_port;
373 ib_net16_t source_lid = 0;
374 boolean_t is_gsi = TRUE;
375 uint8_t port_num = 0;
376 boolean_t physp_change_trap = FALSE;
377 uint64_t event_wheel_timeout = OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT;
378 boolean_t run_heavy_sweep = FALSE;
379 char buf[1024];
380 osm_dr_path_t *p_path;
381 unsigned n;
382
383 OSM_LOG_ENTER(sm->p_log);
384
385 CL_ASSERT(p_madw);
386
387 if (osm_exit_flag)
388 /*
389 We got an exit flag - do nothing
390 Otherwise we start a sweep on the trap 144 caused by
391 cleaning up SM Cap bit...
392 */
393 goto Exit2;
394
395 /* update the is_gsi flag according to the mgmt_class field */
396 if (p_madw->p_mad->mgmt_class == IB_MCLASS_SUBN_LID ||
397 p_madw->p_mad->mgmt_class == IB_MCLASS_SUBN_DIR)
398 is_gsi = FALSE;
399
400 /* No real need to grab the lock for this function. */
401 memset(payload, 0, sizeof(payload));
402 memset(&tmp_madw, 0, sizeof(tmp_madw));
403
404 p_smp = osm_madw_get_smp_ptr(p_madw);
405
406 if (p_smp->method != IB_MAD_METHOD_TRAP) {
407 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3801: "
408 "Unsupported method 0x%X\n", p_smp->method);
409 goto Exit2;
410 }
411
412 /*
413 * The NOTICE Attribute is part of the SMP CLASS attributes
414 * As such the actual attribute data resides inside the SMP
415 * payload.
416 */
417
418 memcpy(payload, &p_smp->data, IB_SMP_DATA_SIZE);
419 memcpy(&tmp_madw, p_madw, sizeof(tmp_madw));
420
421 if (is_gsi == FALSE) {
422 /* We are in smi flow */
423 /*
424 * When we receive a TRAP with dlid = 0 - it means it
425 * came from our own node. So we need to fix it.
426 */
427
428 if (p_madw->mad_addr.addr_type.smi.source_lid == 0) {
429 /* Check if the sm_base_lid is 0. If yes - this means
430 that the local lid wasn't configured yet. Don't send
431 a response to the trap. */
432 if (sm->p_subn->sm_base_lid == 0) {
433 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
434 "Received SLID=0 Trap with local LID=0. Ignoring MAD\n");
435 goto Exit2;
436 }
437 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
438 "Received SLID=0 Trap. Using local LID:%u instead\n",
439 cl_ntoh16(sm->p_subn->sm_base_lid));
440 tmp_madw.mad_addr.addr_type.smi.source_lid =
441 sm->p_subn->sm_base_lid;
442 }
443
444 source_lid = tmp_madw.mad_addr.addr_type.smi.source_lid;
445
446 /* Print some info about the incoming Trap */
447 log_trap_info(sm->p_log, p_ntci, source_lid, p_smp->trans_id);
448 }
449
450 osm_dump_notice_v2(sm->p_log, p_ntci, FILE_ID, OSM_LOG_VERBOSE);
451 CL_PLOCK_ACQUIRE(sm->p_lock);
452 p_physp = osm_get_physp_by_mad_addr(sm->p_log, sm->p_subn,
453 &tmp_madw.mad_addr);
454 if (p_physp)
455 p_smp->m_key = ib_port_info_get_m_key(&p_physp->port_info);
456 else
457 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3809: "
458 "Failed to find source physical port for trap\n");
459
460 status = osm_resp_send(sm, &tmp_madw, 0, payload);
461 if (status != IB_SUCCESS) {
462 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3802: "
463 "Error sending response (%s)\n",
464 ib_get_err_str(status));
465 goto Exit;
466 }
467
468 /*
469 * We would like to filter out recurring Traps so we track them by
470 * their source lid and content. If the same trap was already
471 * received within the aging time window more than 10 times,
472 * we simply ignore it. This is done only if we are in smi mode
473 */
474
475 if (is_gsi == FALSE) {
476 if (ib_notice_is_generic(p_ntci) &&
477 (p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_LINK_INTEGRITY_THRESHOLD_TRAP) ||
478 p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_BUFFER_OVERRUN_THRESHOLD_TRAP) ||
479 p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_WATCHDOG_TIMER_EXPIRED_TRAP))) {
480 /* If this is a trap 129, 130, or 131 - then this is a
481 * trap signaling a change on a physical port.
482 * Mark the physp_change_trap flag as TRUE.
483 */
484 physp_change_trap = TRUE;
485 /* The source_lid should be based on the source_lid from the trap */
486 source_lid = p_ntci->data_details.ntc_129_131.lid;
487 port_num = p_ntci->data_details.ntc_129_131.port_num;
488 }
489
490 /* try to find it in the aging tracker */
491 trap_key = trap_get_key(source_lid, port_num, p_ntci);
492 num_received = cl_event_wheel_num_regs(&sm->trap_aging_tracker,
493 trap_key);
494
495 /* Now we know how many times it provided this trap */
496 if (num_received > 10) {
497 if (print_num_received(num_received))
498 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
499 "Received trap %u times consecutively\n",
500 num_received);
501 /*
502 * If the trap provides info about a bad port
503 * we mark it as unhealthy.
504 */
505 if (physp_change_trap == TRUE) {
506 int ret = shutup_noisy_port(sm, source_lid,
507 port_num,
508 num_received);
509 if (ret == 1) /* port disabled */
510 goto Exit;
511 else if (ret == 2) /* unhealthy - run sweep */
512 run_heavy_sweep = TRUE;
513 /* in any case increase timeout interval */
514 event_wheel_timeout =
515 OSM_DEFAULT_UNHEALTHY_TIMEOUT;
516 }
517 }
518
519 /* restart the aging anyway */
520 /* If physp_change_trap is TRUE - then use a callback to unset
521 the healthy bit. If not - no need to use a callback. */
522 if (physp_change_trap == TRUE)
523 cl_event_wheel_reg(&sm->trap_aging_tracker, trap_key,
524 cl_get_time_stamp() + event_wheel_timeout,
525 aging_tracker_callback, sm);
526 else
527 cl_event_wheel_reg(&sm->trap_aging_tracker, trap_key,
528 cl_get_time_stamp() + event_wheel_timeout,
529 NULL, NULL);
530
531 /* If was already registered do nothing more */
532 if (num_received > 10 && run_heavy_sweep == FALSE) {
533 if (print_num_received(num_received))
534 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
535 "Ignoring noisy traps.\n");
536 goto Exit;
537 }
538 }
539
540 /* Check for node description update. IB Spec v1.2.1 pg 823 */
541 if (!ib_notice_is_generic(p_ntci))
542 goto check_sweep;
543 if (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == SM_LOCAL_CHANGES_TRAP &&
544 p_ntci->data_details.ntc_144.local_changes & TRAP_144_MASK_OTHER_LOCAL_CHANGES &&
545 p_ntci->data_details.ntc_144.change_flgs & TRAP_144_MASK_NODE_DESCRIPTION_CHANGE) {
546 OSM_LOG(sm->p_log, OSM_LOG_INFO, "Trap 144 Node description update\n");
547
548 if (p_physp) {
549 osm_req_get_node_desc(sm, p_physp);
550 if (!(p_ntci->data_details.ntc_144.change_flgs & ~TRAP_144_MASK_NODE_DESCRIPTION_CHANGE) &&
551 p_ntci->data_details.ntc_144.new_cap_mask == p_physp->port_info.capability_mask)
552 goto check_report;
553 } else
554 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
555 "ERR 3812: No physical port found for "
556 "trap 144: \"node description update\"\n");
557 goto check_sweep;
558 } else if (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == SM_SYS_IMG_GUID_CHANGED_TRAP) {
559 if (p_physp) {
560 CL_PLOCK_RELEASE(sm->p_lock);
561 CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
562 p_physp = osm_get_physp_by_mad_addr(sm->p_log,
563 sm->p_subn,
564 &tmp_madw.mad_addr);
565 if (p_physp) {
566 /* this assumes that trap 145 content is not broken? */
567 p_physp->p_node->node_info.sys_guid =
568 p_ntci->data_details.ntc_145.new_sys_guid;
569 }
570 CL_PLOCK_RELEASE(sm->p_lock);
571 CL_PLOCK_ACQUIRE(sm->p_lock);
572 } else
573 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
574 "ERR 3813: No physical port found for "
575 "trap 145: \"SystemImageGUID update\"\n");
576 goto check_report;
577 }
578
579 check_sweep:
580 if (osm_log_is_active_v2(sm->p_log, OSM_LOG_INFO, FILE_ID)) {
581 if (ib_notice_is_generic(p_ntci) &&
582 cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == SM_LINK_STATE_CHANGED_TRAP) {
583 p_path = (p_physp) ?
584 osm_physp_get_dr_path_ptr(p_physp) : NULL;
585 if (p_path) {
586 n = sprintf(buf, "SM class trap %u: ",
587 cl_ntoh16(p_ntci->g_or_v.generic.trap_num));
588 n += snprintf(buf + n, sizeof(buf) - n,
589 "Directed Path Dump of %u hop path: "
590 "Path = ", p_path->hop_count);
591
592 osm_dump_dr_path_as_buf(sizeof(buf) - n, p_path,
593 buf + n);
594
595 osm_log_v2(sm->p_log, OSM_LOG_INFO, FILE_ID,
596 "%s\n", buf);
597 }
598 }
599 }
600
601 /* do a sweep if we received a trap */
602 if (sm->p_subn->opt.sweep_on_trap) {
603 /* if this is trap number 128 or run_heavy_sweep is TRUE -
604 update the force_heavy_sweep flag of the subnet.
605 Sweep also on traps 144 - these traps signal a change of
606 certain port capabilities.
607 TODO: In the future this can be changed to just getting
608 PortInfo on this port instead of sweeping the entire subnet. */
609 if (ib_notice_is_generic(p_ntci) &&
610 (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == SM_LINK_STATE_CHANGED_TRAP ||
611 cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == SM_LOCAL_CHANGES_TRAP ||
612 run_heavy_sweep)) {
613 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
614 "Forcing heavy sweep. Received trap:%u\n",
615 cl_ntoh16(p_ntci->g_or_v.generic.trap_num));
616
617 sm->p_subn->force_heavy_sweep = TRUE;
618 }
619 osm_sm_signal(sm, OSM_SIGNAL_SWEEP);
620 }
621
622 /* If we reached here due to trap 129/130/131 - do not need to do
623 the notice report. Just goto exit. We know this is the case
624 if physp_change_trap is TRUE. */
625 if (physp_change_trap == TRUE)
626 goto Exit;
627
628 check_report:
629 /* We are going to report the notice - so need to fix the IssuerGID
630 accordingly. See IBA 1.2 p.739 or IBA 1.1 p.653 for details. */
631 if (is_gsi) {
632 if (!tmp_madw.mad_addr.addr_type.gsi.global_route) {
633 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3806: "
634 "Received gsi trap with global_route FALSE. "
635 "Cannot update issuer_gid!\n");
636 goto Exit;
637 }
638 memcpy(&p_ntci->issuer_gid,
639 &tmp_madw.mad_addr.addr_type.gsi.grh_info.src_gid,
640 sizeof(ib_gid_t));
641 } else {
642 /* Need to use the IssuerLID */
643 p_port = osm_get_port_by_lid(sm->p_subn, source_lid);
644 if (!p_port) {
645 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
646 "Cannot find port corresponding to lid:%u\n",
647 cl_ntoh16(source_lid));
648
649 goto Exit;
650 }
651
652 p_ntci->issuer_gid.unicast.prefix =
653 sm->p_subn->opt.subnet_prefix;
654 p_ntci->issuer_gid.unicast.interface_id = p_port->guid;
655 }
656
657 /* we need a lock here as the InformInfo DB must be stable */
658 status = osm_report_notice(sm->p_log, sm->p_subn, p_ntci);
659 if (status != IB_SUCCESS) {
660 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3803: "
661 "Error sending trap reports (%s)\n",
662 ib_get_err_str(status));
663 goto Exit;
664 }
665
666 Exit:
667 CL_PLOCK_RELEASE(sm->p_lock);
668 Exit2:
669 OSM_LOG_EXIT(sm->p_log);
670 }
671
osm_trap_rcv_process(IN void * context,IN void * data)672 void osm_trap_rcv_process(IN void *context, IN void *data)
673 {
674 osm_sm_t *sm = context;
675 osm_madw_t *p_madw = data;
676 ib_smp_t __attribute__((unused)) *p_smp;
677
678 OSM_LOG_ENTER(sm->p_log);
679
680 CL_ASSERT(p_madw);
681
682 p_smp = osm_madw_get_smp_ptr(p_madw);
683
684 /* Only Trap requests get here */
685 CL_ASSERT(!ib_smp_is_response(p_smp));
686 trap_rcv_process_request(sm, p_madw);
687
688 OSM_LOG_EXIT(sm->p_log);
689 }
690