1 /*
2 * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 *
35 */
36
37 /*
38 * Abstract:
39 * Implementation of osm_sm_t.
40 * This object represents the SM Receiver object.
41 * This object is part of the opensm family of objects.
42 */
43
44 #if HAVE_CONFIG_H
45 # include <config.h>
46 #endif /* HAVE_CONFIG_H */
47
48 #include <stdlib.h>
49 #include <string.h>
50 #include <iba/ib_types.h>
51 #include <complib/cl_qmap.h>
52 #include <complib/cl_passivelock.h>
53 #include <complib/cl_debug.h>
54 #include <complib/cl_thread.h>
55 #include <opensm/osm_file_ids.h>
56 #define FILE_ID OSM_FILE_SM_C
57 #include <opensm/osm_sm.h>
58 #include <opensm/osm_madw.h>
59 #include <opensm/osm_log.h>
60 #include <opensm/osm_node.h>
61 #include <opensm/osm_msgdef.h>
62 #include <opensm/osm_perfmgr.h>
63 #include <opensm/osm_opensm.h>
64
65 #define OSM_SM_INITIAL_TID_VALUE 0x1233
66
67 extern void osm_lft_rcv_process(IN void *context, IN void *data);
68 extern void osm_mft_rcv_process(IN void *context, IN void *data);
69 extern void osm_nd_rcv_process(IN void *context, IN void *data);
70 extern void osm_ni_rcv_process(IN void *context, IN void *data);
71 extern void osm_pkey_rcv_process(IN void *context, IN void *data);
72 extern void osm_pi_rcv_process(IN void *context, IN void *data);
73 extern void osm_gi_rcv_process(IN void *context, IN void *data);
74 extern void osm_slvl_rcv_process(IN void *context, IN void *p_data);
75 extern void osm_sminfo_rcv_process(IN void *context, IN void *data);
76 extern void osm_si_rcv_process(IN void *context, IN void *data);
77 extern void osm_trap_rcv_process(IN void *context, IN void *data);
78 extern void osm_vla_rcv_process(IN void *context, IN void *data);
79 extern void osm_mlnx_epi_rcv_process(IN void *context, IN void *data);
80
81 extern void osm_state_mgr_process(IN osm_sm_t * sm, IN osm_signal_t signal);
82 extern void osm_sm_state_mgr_polling_callback(IN void *context);
83
sm_process(osm_sm_t * sm,osm_signal_t signal)84 static void sm_process(osm_sm_t * sm, osm_signal_t signal)
85 {
86 #ifdef ENABLE_OSM_PERF_MGR
87 if (signal == OSM_SIGNAL_PERFMGR_SWEEP)
88 osm_perfmgr_process(&sm->p_subn->p_osm->perfmgr);
89 else
90 #endif
91 osm_state_mgr_process(sm, signal);
92 }
93
sm_sweeper(IN void * p_ptr)94 static void sm_sweeper(IN void *p_ptr)
95 {
96 ib_api_status_t status;
97 osm_sm_t * p_sm = p_ptr;
98 unsigned signals, i;
99
100 OSM_LOG_ENTER(p_sm->p_log);
101
102 while (p_sm->thread_state == OSM_THREAD_STATE_RUN) {
103 /*
104 * Wait on the event with a timeout.
105 * Sweeps may be initiated "off schedule" by simply
106 * signaling the event.
107 */
108 status = cl_event_wait_on(&p_sm->signal_event,
109 EVENT_NO_TIMEOUT, TRUE);
110
111 if (status == CL_SUCCESS)
112 OSM_LOG(p_sm->p_log, OSM_LOG_DEBUG,
113 "Off schedule sweep signalled\n");
114 else {
115 OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E01: "
116 "Event wait failed (%s)\n",
117 CL_STATUS_MSG(status));
118 continue;
119 }
120
121 if (osm_exit_flag)
122 break;
123
124 cl_spinlock_acquire(&p_sm->signal_lock);
125 signals = p_sm->signal_mask;
126 p_sm->signal_mask = 0;
127 cl_spinlock_release(&p_sm->signal_lock);
128
129 for (i = 0; signals; signals >>= 1, i++)
130 if (signals & 1)
131 sm_process(p_sm, i);
132 }
133
134 OSM_LOG_EXIT(p_sm->p_log);
135 }
136
sm_sweep(void * arg)137 static void sm_sweep(void *arg)
138 {
139 osm_sm_t *sm = arg;
140
141 /* do the sweep only if we are in MASTER state */
142 if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER ||
143 sm->p_subn->sm_state == IB_SMINFO_STATE_DISCOVERING)
144 osm_sm_signal(sm, OSM_SIGNAL_SWEEP);
145 cl_timer_start(&sm->sweep_timer, sm->p_subn->opt.sweep_interval * 1000);
146 }
147
sweep_fail_process(IN void * context,IN void * p_data)148 static void sweep_fail_process(IN void *context, IN void *p_data)
149 {
150 osm_sm_t *sm = context;
151
152 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "light sweep failed\n");
153 sm->p_subn->force_heavy_sweep = TRUE;
154 }
155
osm_sm_construct(IN osm_sm_t * p_sm)156 void osm_sm_construct(IN osm_sm_t * p_sm)
157 {
158 memset(p_sm, 0, sizeof(*p_sm));
159 p_sm->thread_state = OSM_THREAD_STATE_NONE;
160 p_sm->sm_trans_id = OSM_SM_INITIAL_TID_VALUE;
161 cl_spinlock_construct(&p_sm->signal_lock);
162 cl_spinlock_construct(&p_sm->state_lock);
163 cl_timer_construct(&p_sm->polling_timer);
164 cl_event_construct(&p_sm->signal_event);
165 cl_event_construct(&p_sm->subnet_up_event);
166 cl_event_wheel_construct(&p_sm->trap_aging_tracker);
167 cl_thread_construct(&p_sm->sweeper);
168 osm_sm_mad_ctrl_construct(&p_sm->mad_ctrl);
169 osm_lid_mgr_construct(&p_sm->lid_mgr);
170 osm_ucast_mgr_construct(&p_sm->ucast_mgr);
171 }
172
osm_sm_shutdown(IN osm_sm_t * p_sm)173 void osm_sm_shutdown(IN osm_sm_t * p_sm)
174 {
175 boolean_t signal_event = FALSE;
176
177 OSM_LOG_ENTER(p_sm->p_log);
178
179 /*
180 * Signal our threads that we're leaving.
181 */
182 if (p_sm->thread_state != OSM_THREAD_STATE_NONE)
183 signal_event = TRUE;
184
185 p_sm->thread_state = OSM_THREAD_STATE_EXIT;
186
187 /*
188 * Don't trigger unless event has been initialized.
189 * Destroy the thread before we tear down the other objects.
190 */
191 if (signal_event)
192 cl_event_signal(&p_sm->signal_event);
193
194 cl_timer_stop(&p_sm->polling_timer);
195 cl_timer_stop(&p_sm->sweep_timer);
196 cl_thread_destroy(&p_sm->sweeper);
197
198 /*
199 * Always destroy controllers before the corresponding
200 * receiver to guarantee that all callbacks from the
201 * dispatcher are complete.
202 */
203 osm_sm_mad_ctrl_destroy(&p_sm->mad_ctrl);
204 cl_disp_unregister(p_sm->ni_disp_h);
205 cl_disp_unregister(p_sm->pi_disp_h);
206 cl_disp_unregister(p_sm->gi_disp_h);
207 cl_disp_unregister(p_sm->si_disp_h);
208 cl_disp_unregister(p_sm->nd_disp_h);
209 cl_disp_unregister(p_sm->lft_disp_h);
210 cl_disp_unregister(p_sm->mft_disp_h);
211 cl_disp_unregister(p_sm->sm_info_disp_h);
212 cl_disp_unregister(p_sm->trap_disp_h);
213 cl_disp_unregister(p_sm->slvl_disp_h);
214 cl_disp_unregister(p_sm->vla_disp_h);
215 cl_disp_unregister(p_sm->pkey_disp_h);
216 cl_disp_unregister(p_sm->mlnx_epi_disp_h);
217 cl_disp_unregister(p_sm->sweep_fail_disp_h);
218
219 OSM_LOG_EXIT(p_sm->p_log);
220 }
221
osm_sm_destroy(IN osm_sm_t * p_sm)222 void osm_sm_destroy(IN osm_sm_t * p_sm)
223 {
224 OSM_LOG_ENTER(p_sm->p_log);
225 osm_lid_mgr_destroy(&p_sm->lid_mgr);
226 osm_ucast_mgr_destroy(&p_sm->ucast_mgr);
227 cl_event_wheel_destroy(&p_sm->trap_aging_tracker);
228 cl_timer_destroy(&p_sm->sweep_timer);
229 cl_timer_destroy(&p_sm->polling_timer);
230 cl_event_destroy(&p_sm->signal_event);
231 cl_event_destroy(&p_sm->subnet_up_event);
232 cl_spinlock_destroy(&p_sm->signal_lock);
233 cl_spinlock_destroy(&p_sm->state_lock);
234 free(p_sm->mlids_req);
235
236 osm_log_v2(p_sm->p_log, OSM_LOG_SYS, FILE_ID, "Exiting SM\n"); /* Format Waived */
237 OSM_LOG_EXIT(p_sm->p_log);
238 }
239
osm_sm_init(IN osm_sm_t * p_sm,IN osm_subn_t * p_subn,IN osm_db_t * p_db,IN osm_vendor_t * p_vendor,IN osm_mad_pool_t * p_mad_pool,IN osm_vl15_t * p_vl15,IN osm_log_t * p_log,IN osm_stats_t * p_stats,IN cl_dispatcher_t * p_disp,IN cl_plock_t * p_lock)240 ib_api_status_t osm_sm_init(IN osm_sm_t * p_sm, IN osm_subn_t * p_subn,
241 IN osm_db_t * p_db, IN osm_vendor_t * p_vendor,
242 IN osm_mad_pool_t * p_mad_pool,
243 IN osm_vl15_t * p_vl15, IN osm_log_t * p_log,
244 IN osm_stats_t * p_stats,
245 IN cl_dispatcher_t * p_disp, IN cl_plock_t * p_lock)
246 {
247 ib_api_status_t status;
248
249 OSM_LOG_ENTER(p_log);
250
251 p_sm->p_subn = p_subn;
252 p_sm->p_db = p_db;
253 p_sm->p_vendor = p_vendor;
254 p_sm->p_mad_pool = p_mad_pool;
255 p_sm->p_vl15 = p_vl15;
256 p_sm->p_log = p_log;
257 p_sm->p_disp = p_disp;
258 p_sm->p_lock = p_lock;
259
260 status = cl_spinlock_init(&p_sm->signal_lock);
261 if (status != CL_SUCCESS)
262 goto Exit;
263
264 status = cl_spinlock_init(&p_sm->state_lock);
265 if (status != CL_SUCCESS)
266 goto Exit;
267
268 status = cl_event_init(&p_sm->signal_event, FALSE);
269 if (status != CL_SUCCESS)
270 goto Exit;
271
272 status = cl_event_init(&p_sm->subnet_up_event, FALSE);
273 if (status != CL_SUCCESS)
274 goto Exit;
275
276 status = cl_timer_init(&p_sm->sweep_timer, sm_sweep, p_sm);
277 if (status != CL_SUCCESS)
278 goto Exit;
279
280 status = cl_timer_init(&p_sm->polling_timer,
281 osm_sm_state_mgr_polling_callback, p_sm);
282 if (status != CL_SUCCESS)
283 goto Exit;
284
285 p_sm->mlids_req_max = 0;
286 p_sm->mlids_req = malloc((IB_LID_MCAST_END_HO - IB_LID_MCAST_START_HO +
287 1) * sizeof(p_sm->mlids_req[0]));
288 if (!p_sm->mlids_req)
289 goto Exit;
290 memset(p_sm->mlids_req, 0,
291 (IB_LID_MCAST_END_HO - IB_LID_MCAST_START_HO +
292 1) * sizeof(p_sm->mlids_req[0]));
293
294 status = osm_sm_mad_ctrl_init(&p_sm->mad_ctrl, p_sm->p_subn,
295 p_sm->p_mad_pool, p_sm->p_vl15,
296 p_sm->p_vendor,
297 p_log, p_stats, p_lock, p_disp);
298 if (status != IB_SUCCESS)
299 goto Exit;
300
301 status = cl_event_wheel_init(&p_sm->trap_aging_tracker);
302 if (status != IB_SUCCESS)
303 goto Exit;
304
305 status = osm_lid_mgr_init(&p_sm->lid_mgr, p_sm);
306 if (status != IB_SUCCESS)
307 goto Exit;
308
309 status = osm_ucast_mgr_init(&p_sm->ucast_mgr, p_sm);
310 if (status != IB_SUCCESS)
311 goto Exit;
312
313 status = IB_INSUFFICIENT_RESOURCES;
314 p_sm->sweep_fail_disp_h = cl_disp_register(p_disp,
315 OSM_MSG_LIGHT_SWEEP_FAIL,
316 sweep_fail_process, p_sm);
317 if (p_sm->sweep_fail_disp_h == CL_DISP_INVALID_HANDLE)
318 goto Exit;
319
320 p_sm->ni_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_NODE_INFO,
321 osm_ni_rcv_process, p_sm);
322 if (p_sm->ni_disp_h == CL_DISP_INVALID_HANDLE)
323 goto Exit;
324
325 p_sm->pi_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_PORT_INFO,
326 osm_pi_rcv_process, p_sm);
327 if (p_sm->pi_disp_h == CL_DISP_INVALID_HANDLE)
328 goto Exit;
329
330 p_sm->gi_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_GUID_INFO,
331 osm_gi_rcv_process, p_sm);
332 if (p_sm->gi_disp_h == CL_DISP_INVALID_HANDLE)
333 goto Exit;
334
335 p_sm->si_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SWITCH_INFO,
336 osm_si_rcv_process, p_sm);
337 if (p_sm->si_disp_h == CL_DISP_INVALID_HANDLE)
338 goto Exit;
339
340 p_sm->nd_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_NODE_DESC,
341 osm_nd_rcv_process, p_sm);
342 if (p_sm->nd_disp_h == CL_DISP_INVALID_HANDLE)
343 goto Exit;
344
345 p_sm->lft_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_LFT,
346 osm_lft_rcv_process, p_sm);
347 if (p_sm->lft_disp_h == CL_DISP_INVALID_HANDLE)
348 goto Exit;
349
350 p_sm->mft_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_MFT,
351 osm_mft_rcv_process, p_sm);
352 if (p_sm->mft_disp_h == CL_DISP_INVALID_HANDLE)
353 goto Exit;
354
355 p_sm->sm_info_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SM_INFO,
356 osm_sminfo_rcv_process, p_sm);
357 if (p_sm->sm_info_disp_h == CL_DISP_INVALID_HANDLE)
358 goto Exit;
359
360 p_sm->trap_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_NOTICE,
361 osm_trap_rcv_process, p_sm);
362 if (p_sm->trap_disp_h == CL_DISP_INVALID_HANDLE)
363 goto Exit;
364
365 p_sm->slvl_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SLVL,
366 osm_slvl_rcv_process, p_sm);
367 if (p_sm->slvl_disp_h == CL_DISP_INVALID_HANDLE)
368 goto Exit;
369
370 p_sm->vla_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_VL_ARB,
371 osm_vla_rcv_process, p_sm);
372 if (p_sm->vla_disp_h == CL_DISP_INVALID_HANDLE)
373 goto Exit;
374
375 p_sm->pkey_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_PKEY,
376 osm_pkey_rcv_process, p_sm);
377 if (p_sm->pkey_disp_h == CL_DISP_INVALID_HANDLE)
378 goto Exit;
379
380 p_sm->mlnx_epi_disp_h = cl_disp_register(p_disp,
381 OSM_MSG_MAD_MLNX_EXT_PORT_INFO,
382 osm_mlnx_epi_rcv_process, p_sm);
383 if (p_sm->mlnx_epi_disp_h == CL_DISP_INVALID_HANDLE)
384 goto Exit;
385
386 p_subn->sm_state = p_subn->opt.sm_inactive ?
387 IB_SMINFO_STATE_NOTACTIVE : IB_SMINFO_STATE_DISCOVERING;
388 osm_report_sm_state(p_sm);
389
390 /*
391 * Now that the component objects are initialized, start
392 * the sweeper thread if the user wants sweeping.
393 */
394 p_sm->thread_state = OSM_THREAD_STATE_RUN;
395 status = cl_thread_init(&p_sm->sweeper, sm_sweeper, p_sm,
396 "opensm sweeper");
397 if (status != IB_SUCCESS)
398 goto Exit;
399
400 if (p_sm->p_subn->opt.sweep_interval)
401 cl_timer_start(&p_sm->sweep_timer,
402 p_sm->p_subn->opt.sweep_interval * 1000);
403
404 Exit:
405 OSM_LOG_EXIT(p_log);
406 return status;
407 }
408
osm_sm_signal(osm_sm_t * p_sm,osm_signal_t signal)409 void osm_sm_signal(osm_sm_t * p_sm, osm_signal_t signal)
410 {
411 cl_spinlock_acquire(&p_sm->signal_lock);
412 p_sm->signal_mask |= 1 << signal;
413 cl_event_signal(&p_sm->signal_event);
414 cl_spinlock_release(&p_sm->signal_lock);
415 }
416
osm_sm_sweep(IN osm_sm_t * p_sm)417 void osm_sm_sweep(IN osm_sm_t * p_sm)
418 {
419 OSM_LOG_ENTER(p_sm->p_log);
420 osm_sm_signal(p_sm, OSM_SIGNAL_SWEEP);
421 OSM_LOG_EXIT(p_sm->p_log);
422 }
423
osm_sm_bind(IN osm_sm_t * p_sm,IN ib_net64_t port_guid)424 ib_api_status_t osm_sm_bind(IN osm_sm_t * p_sm, IN ib_net64_t port_guid)
425 {
426 ib_api_status_t status;
427
428 OSM_LOG_ENTER(p_sm->p_log);
429
430 status = osm_sm_mad_ctrl_bind(&p_sm->mad_ctrl, port_guid);
431
432 if (status != IB_SUCCESS) {
433 OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E10: "
434 "SM MAD Controller bind failed (%s)\n",
435 ib_get_err_str(status));
436 goto Exit;
437 }
438
439 Exit:
440 OSM_LOG_EXIT(p_sm->p_log);
441 return status;
442 }
443
osm_sm_reroute_mlid(osm_sm_t * sm,ib_net16_t mlid)444 void osm_sm_reroute_mlid(osm_sm_t * sm, ib_net16_t mlid)
445 {
446 mlid = cl_ntoh16(mlid) - IB_LID_MCAST_START_HO;
447 sm->mlids_req[mlid] = 1;
448 if (sm->mlids_req_max < mlid)
449 sm->mlids_req_max = mlid;
450 osm_sm_signal(sm, OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST);
451 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "rerouting requested for MLID 0x%x\n",
452 mlid + IB_LID_MCAST_START_HO);
453 }
454
osm_set_sm_priority(osm_sm_t * sm,uint8_t priority)455 void osm_set_sm_priority(osm_sm_t * sm, uint8_t priority)
456 {
457 uint8_t old_pri = sm->p_subn->opt.sm_priority;
458
459 sm->p_subn->opt.sm_priority = priority;
460
461 if (old_pri < priority &&
462 sm->p_subn->sm_state == IB_SMINFO_STATE_STANDBY)
463 osm_send_trap144(sm, TRAP_144_MASK_SM_PRIORITY_CHANGE);
464 }
465